patch-realtime 903 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578115791158011581115821158311584115851158611587115881158911590115911159211593115941159511596115971159811599116001160111602116031160411605116061160711608116091161011611116121161311614116151161611617116181161911620116211162211623116241162511626116271162811629116301163111632116331163411635116361163711638116391164011641116421164311644116451164611647116481164911650116511165211653116541165511656116571165811659116601166111662116631166411665116661166711668116691167011671116721167311674116751167611677116781167911680116811168211683116841168511686116871168811689116901169111692116931169411695116961169711698116991170011701117021170311704117051170611707117081170911710117111171211713117141171511716117171171811719117201172111722117231172411725117261172711728117291173011731117321173311734117351173611737117381173911740117411174211743117441174511746117471174811749117501175111752117531175411755117561175711758117591176011761117621176311764117651176611767117681176911770117711177211773117741177511776117771177811779117801178111782117831178411785117861178711788117891179011791117921179311794117951179611797117981179911800118011180211803118041180511806118071180811809118101181111812118131181411815118161181711818118191182011821118221182311824118251182611827118281182911830118311183211833118341183511836118371183811839118401184111842118431184411845118461184711848118491185011851118521185311854118551185611857118581185911860118611186211863118641186511866118671186811869118701187111872118731187411875118761187711878118791188011881118821188311884118851188611887118881188911890118911189211893118941189511896118971189811899119001190111902119031190411905119061190711908119091191011911119121191311914119151191611917119181191911920119211192211923119241192511926119271192811929119301193111932119331193411935119361193711938119391194011941119421194311944119451194611947119481194911950119511195211953119541195511956119571195811959119601196111962119631196411965119661196711968119691197011971119721197311974119751197611977119781197911980119811198211983119841198511986119871198811989119901199111992119931199411995119961199711998119991200012001120021200312004120051200612007120081200912010120111201212013120141201512016120171201812019120201202112022120231202412025120261202712028120291203012031120321203312034120351203612037120381203912040120411204212043120441204512046120471204812049120501205112052120531205412055120561205712058120591206012061120621206312064120651206612067120681206912070120711207212073120741207512076120771207812079120801208112082120831208412085120861208712088120891209012091120921209312094120951209612097120981209912100121011210212103121041210512106121071210812109121101211112112121131211412115121161211712118121191212012121121221212312124121251212612127121281212912130121311213212133121341213512136121371213812139121401214112142121431214412145121461214712148121491215012151121521215312154121551215612157121581215912160121611216212163121641216512166121671216812169121701217112172121731217412175121761217712178121791218012181121821218312184121851218612187121881218912190121911219212193121941219512196121971219812199122001220112202122031220412205122061220712208122091221012211122121221312214122151221612217122181221912220122211222212223122241222512226122271222812229122301223112232122331223412235122361223712238122391224012241122421224312244122451224612247122481224912250122511225212253122541225512256122571225812259122601226112262122631226412265122661226712268122691227012271122721227312274122751227612277122781227912280122811228212283122841228512286122871228812289122901229112292122931229412295122961229712298122991230012301123021230312304123051230612307123081230912310123111231212313123141231512316123171231812319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568125691257012571125721257312574125751257612577125781257912580125811258212583125841258512586125871258812589125901259112592125931259412595125961259712598125991260012601126021260312604126051260612607126081260912610126111261212613126141261512616126171261812619126201262112622126231262412625126261262712628126291263012631126321263312634126351263612637126381263912640126411264212643126441264512646126471264812649126501265112652126531265412655126561265712658126591266012661126621266312664126651266612667126681266912670126711267212673126741267512676126771267812679126801268112682126831268412685126861268712688126891269012691126921269312694126951269612697126981269912700127011270212703127041270512706127071270812709127101271112712127131271412715127161271712718127191272012721127221272312724127251272612727127281272912730127311273212733127341273512736127371273812739127401274112742127431274412745127461274712748127491275012751127521275312754127551275612757127581275912760127611276212763127641276512766127671276812769127701277112772127731277412775127761277712778127791278012781127821278312784127851278612787127881278912790127911279212793127941279512796127971279812799128001280112802128031280412805128061280712808128091281012811128121281312814128151281612817128181281912820128211282212823128241282512826128271282812829128301283112832128331283412835128361283712838128391284012841128421284312844128451284612847128481284912850128511285212853128541285512856128571285812859128601286112862128631286412865128661286712868128691287012871128721287312874128751287612877128781287912880128811288212883128841288512886128871288812889128901289112892128931289412895128961289712898128991290012901129021290312904129051290612907129081290912910129111291212913129141291512916129171291812919129201292112922129231292412925129261292712928129291293012931129321293312934129351293612937129381293912940129411294212943129441294512946129471294812949129501295112952129531295412955129561295712958129591296012961129621296312964129651296612967129681296912970129711297212973129741297512976129771297812979129801298112982129831298412985129861298712988129891299012991129921299312994129951299612997129981299913000130011300213003130041300513006130071300813009130101301113012130131301413015130161301713018130191302013021130221302313024130251302613027130281302913030130311303213033130341303513036130371303813039130401304113042130431304413045130461304713048130491305013051130521305313054130551305613057130581305913060130611306213063130641306513066130671306813069130701307113072130731307413075130761307713078130791308013081130821308313084130851308613087130881308913090130911309213093130941309513096130971309813099131001310113102131031310413105131061310713108131091311013111131121311313114131151311613117131181311913120131211312213123131241312513126131271312813129131301313113132131331313413135131361313713138131391314013141131421314313144131451314613147131481314913150131511315213153131541315513156131571315813159131601316113162131631316413165131661316713168131691317013171131721317313174131751317613177131781317913180131811318213183131841318513186131871318813189131901319113192131931319413195131961319713198131991320013201132021320313204132051320613207132081320913210132111321213213132141321513216132171321813219132201322113222132231322413225132261322713228132291323013231132321323313234132351323613237132381323913240132411324213243132441324513246132471324813249132501325113252132531325413255132561325713258132591326013261132621326313264132651326613267132681326913270132711327213273132741327513276132771327813279132801328113282132831328413285132861328713288132891329013291132921329313294132951329613297132981329913300133011330213303133041330513306133071330813309133101331113312133131331413315133161331713318133191332013321133221332313324133251332613327133281332913330133311333213333133341333513336133371333813339133401334113342133431334413345133461334713348133491335013351133521335313354133551335613357133581335913360133611336213363133641336513366133671336813369133701337113372133731337413375133761337713378133791338013381133821338313384133851338613387133881338913390133911339213393133941339513396133971339813399134001340113402134031340413405134061340713408134091341013411134121341313414134151341613417134181341913420134211342213423134241342513426134271342813429134301343113432134331343413435134361343713438134391344013441134421344313444134451344613447134481344913450134511345213453134541345513456134571345813459134601346113462134631346413465134661346713468134691347013471134721347313474134751347613477134781347913480134811348213483134841348513486134871348813489134901349113492134931349413495134961349713498134991350013501135021350313504135051350613507135081350913510135111351213513135141351513516135171351813519135201352113522135231352413525135261352713528135291353013531135321353313534135351353613537135381353913540135411354213543135441354513546135471354813549135501355113552135531355413555135561355713558135591356013561135621356313564135651356613567135681356913570135711357213573135741357513576135771357813579135801358113582135831358413585135861358713588135891359013591135921359313594135951359613597135981359913600136011360213603136041360513606136071360813609136101361113612136131361413615136161361713618136191362013621136221362313624136251362613627136281362913630136311363213633136341363513636136371363813639136401364113642136431364413645136461364713648136491365013651136521365313654136551365613657136581365913660136611366213663136641366513666136671366813669136701367113672136731367413675136761367713678136791368013681136821368313684136851368613687136881368913690136911369213693136941369513696136971369813699137001370113702137031370413705137061370713708137091371013711137121371313714137151371613717137181371913720137211372213723137241372513726137271372813729137301373113732137331373413735137361373713738137391374013741137421374313744137451374613747137481374913750137511375213753137541375513756137571375813759137601376113762137631376413765137661376713768137691377013771137721377313774137751377613777137781377913780137811378213783137841378513786137871378813789137901379113792137931379413795137961379713798137991380013801138021380313804138051380613807138081380913810138111381213813138141381513816138171381813819138201382113822138231382413825138261382713828138291383013831138321383313834138351383613837138381383913840138411384213843138441384513846138471384813849138501385113852138531385413855138561385713858138591386013861138621386313864138651386613867138681386913870138711387213873138741387513876138771387813879138801388113882138831388413885138861388713888138891389013891138921389313894138951389613897138981389913900139011390213903139041390513906139071390813909139101391113912139131391413915139161391713918139191392013921139221392313924139251392613927139281392913930139311393213933139341393513936139371393813939139401394113942139431394413945139461394713948139491395013951139521395313954139551395613957139581395913960139611396213963139641396513966139671396813969139701397113972139731397413975139761397713978139791398013981139821398313984139851398613987139881398913990139911399213993139941399513996139971399813999140001400114002140031400414005140061400714008140091401014011140121401314014140151401614017140181401914020140211402214023140241402514026140271402814029140301403114032140331403414035140361403714038140391404014041140421404314044140451404614047140481404914050140511405214053140541405514056140571405814059140601406114062140631406414065140661406714068140691407014071140721407314074140751407614077140781407914080140811408214083140841408514086140871408814089140901409114092140931409414095140961409714098140991410014101141021410314104141051410614107141081410914110141111411214113141141411514116141171411814119141201412114122141231412414125141261412714128141291413014131141321413314134141351413614137141381413914140141411414214143141441414514146141471414814149141501415114152141531415414155141561415714158141591416014161141621416314164141651416614167141681416914170141711417214173141741417514176141771417814179141801418114182141831418414185141861418714188141891419014191141921419314194141951419614197141981419914200142011420214203142041420514206142071420814209142101421114212142131421414215142161421714218142191422014221142221422314224142251422614227142281422914230142311423214233142341423514236142371423814239142401424114242142431424414245142461424714248142491425014251142521425314254142551425614257142581425914260142611426214263142641426514266142671426814269142701427114272142731427414275142761427714278142791428014281142821428314284142851428614287142881428914290142911429214293142941429514296142971429814299143001430114302143031430414305143061430714308143091431014311143121431314314143151431614317143181431914320143211432214323143241432514326143271432814329143301433114332143331433414335143361433714338143391434014341143421434314344143451434614347143481434914350143511435214353143541435514356143571435814359143601436114362143631436414365143661436714368143691437014371143721437314374143751437614377143781437914380143811438214383143841438514386143871438814389143901439114392143931439414395143961439714398143991440014401144021440314404144051440614407144081440914410144111441214413144141441514416144171441814419144201442114422144231442414425144261442714428144291443014431144321443314434144351443614437144381443914440144411444214443144441444514446144471444814449144501445114452144531445414455144561445714458144591446014461144621446314464144651446614467144681446914470144711447214473144741447514476144771447814479144801448114482144831448414485144861448714488144891449014491144921449314494144951449614497144981449914500145011450214503145041450514506145071450814509145101451114512145131451414515145161451714518145191452014521145221452314524145251452614527145281452914530145311453214533145341453514536145371453814539145401454114542145431454414545145461454714548145491455014551145521455314554145551455614557145581455914560145611456214563145641456514566145671456814569145701457114572145731457414575145761457714578145791458014581145821458314584145851458614587145881458914590145911459214593145941459514596145971459814599146001460114602146031460414605146061460714608146091461014611146121461314614146151461614617146181461914620146211462214623146241462514626146271462814629146301463114632146331463414635146361463714638146391464014641146421464314644146451464614647146481464914650146511465214653146541465514656146571465814659146601466114662146631466414665146661466714668146691467014671146721467314674146751467614677146781467914680146811468214683146841468514686146871468814689146901469114692146931469414695146961469714698146991470014701147021470314704147051470614707147081470914710147111471214713147141471514716147171471814719147201472114722147231472414725147261472714728147291473014731147321473314734147351473614737147381473914740147411474214743147441474514746147471474814749147501475114752147531475414755147561475714758147591476014761147621476314764147651476614767147681476914770147711477214773147741477514776147771477814779147801478114782147831478414785147861478714788147891479014791147921479314794147951479614797147981479914800148011480214803148041480514806148071480814809148101481114812148131481414815148161481714818148191482014821148221482314824148251482614827148281482914830148311483214833148341483514836148371483814839148401484114842148431484414845148461484714848148491485014851148521485314854148551485614857148581485914860148611486214863148641486514866148671486814869148701487114872148731487414875148761487714878148791488014881148821488314884148851488614887148881488914890148911489214893148941489514896148971489814899149001490114902149031490414905149061490714908149091491014911149121491314914149151491614917149181491914920149211492214923149241492514926149271492814929149301493114932149331493414935149361493714938149391494014941149421494314944149451494614947149481494914950149511495214953149541495514956149571495814959149601496114962149631496414965149661496714968149691497014971149721497314974149751497614977149781497914980149811498214983149841498514986149871498814989149901499114992149931499414995149961499714998149991500015001150021500315004150051500615007150081500915010150111501215013150141501515016150171501815019150201502115022150231502415025150261502715028150291503015031150321503315034150351503615037150381503915040150411504215043150441504515046150471504815049150501505115052150531505415055150561505715058150591506015061150621506315064150651506615067150681506915070150711507215073150741507515076150771507815079150801508115082150831508415085150861508715088150891509015091150921509315094150951509615097150981509915100151011510215103151041510515106151071510815109151101511115112151131511415115151161511715118151191512015121151221512315124151251512615127151281512915130151311513215133151341513515136151371513815139151401514115142151431514415145151461514715148151491515015151151521515315154151551515615157151581515915160151611516215163151641516515166151671516815169151701517115172151731517415175151761517715178151791518015181151821518315184151851518615187151881518915190151911519215193151941519515196151971519815199152001520115202152031520415205152061520715208152091521015211152121521315214152151521615217152181521915220152211522215223152241522515226152271522815229152301523115232152331523415235152361523715238152391524015241152421524315244152451524615247152481524915250152511525215253152541525515256152571525815259152601526115262152631526415265152661526715268152691527015271152721527315274152751527615277152781527915280152811528215283152841528515286152871528815289152901529115292152931529415295152961529715298152991530015301153021530315304153051530615307153081530915310153111531215313153141531515316153171531815319153201532115322153231532415325153261532715328153291533015331153321533315334153351533615337153381533915340153411534215343153441534515346153471534815349153501535115352153531535415355153561535715358153591536015361153621536315364153651536615367153681536915370153711537215373153741537515376153771537815379153801538115382153831538415385153861538715388153891539015391153921539315394153951539615397153981539915400154011540215403154041540515406154071540815409154101541115412154131541415415154161541715418154191542015421154221542315424154251542615427154281542915430154311543215433154341543515436154371543815439154401544115442154431544415445154461544715448154491545015451154521545315454154551545615457154581545915460154611546215463154641546515466154671546815469154701547115472154731547415475154761547715478154791548015481154821548315484154851548615487154881548915490154911549215493154941549515496154971549815499155001550115502155031550415505155061550715508155091551015511155121551315514155151551615517155181551915520155211552215523155241552515526155271552815529155301553115532155331553415535155361553715538155391554015541155421554315544155451554615547155481554915550155511555215553155541555515556155571555815559155601556115562155631556415565155661556715568155691557015571155721557315574155751557615577155781557915580155811558215583155841558515586155871558815589155901559115592155931559415595155961559715598155991560015601156021560315604156051560615607156081560915610156111561215613156141561515616156171561815619156201562115622156231562415625156261562715628156291563015631156321563315634156351563615637156381563915640156411564215643156441564515646156471564815649156501565115652156531565415655156561565715658156591566015661156621566315664156651566615667156681566915670156711567215673156741567515676156771567815679156801568115682156831568415685156861568715688156891569015691156921569315694156951569615697156981569915700157011570215703157041570515706157071570815709157101571115712157131571415715157161571715718157191572015721157221572315724157251572615727157281572915730157311573215733157341573515736157371573815739157401574115742157431574415745157461574715748157491575015751157521575315754157551575615757157581575915760157611576215763157641576515766157671576815769157701577115772157731577415775157761577715778157791578015781157821578315784157851578615787157881578915790157911579215793157941579515796157971579815799158001580115802158031580415805158061580715808158091581015811158121581315814158151581615817158181581915820158211582215823158241582515826158271582815829158301583115832158331583415835158361583715838158391584015841158421584315844158451584615847158481584915850158511585215853158541585515856158571585815859158601586115862158631586415865158661586715868158691587015871158721587315874158751587615877158781587915880158811588215883158841588515886158871588815889158901589115892158931589415895158961589715898158991590015901159021590315904159051590615907159081590915910159111591215913159141591515916159171591815919159201592115922159231592415925159261592715928159291593015931159321593315934159351593615937159381593915940159411594215943159441594515946159471594815949159501595115952159531595415955159561595715958159591596015961159621596315964159651596615967159681596915970159711597215973159741597515976159771597815979159801598115982159831598415985159861598715988159891599015991159921599315994159951599615997159981599916000160011600216003160041600516006160071600816009160101601116012160131601416015160161601716018160191602016021160221602316024160251602616027160281602916030160311603216033160341603516036160371603816039160401604116042160431604416045160461604716048160491605016051160521605316054160551605616057160581605916060160611606216063160641606516066160671606816069160701607116072160731607416075160761607716078160791608016081160821608316084160851608616087160881608916090160911609216093160941609516096160971609816099161001610116102161031610416105161061610716108161091611016111161121611316114161151611616117161181611916120161211612216123161241612516126161271612816129161301613116132161331613416135161361613716138161391614016141161421614316144161451614616147161481614916150161511615216153161541615516156161571615816159161601616116162161631616416165161661616716168161691617016171161721617316174161751617616177161781617916180161811618216183161841618516186161871618816189161901619116192161931619416195161961619716198161991620016201162021620316204162051620616207162081620916210162111621216213162141621516216162171621816219162201622116222162231622416225162261622716228162291623016231162321623316234162351623616237162381623916240162411624216243162441624516246162471624816249162501625116252162531625416255162561625716258162591626016261162621626316264162651626616267162681626916270162711627216273162741627516276162771627816279162801628116282162831628416285162861628716288162891629016291162921629316294162951629616297162981629916300163011630216303163041630516306163071630816309163101631116312163131631416315163161631716318163191632016321163221632316324163251632616327163281632916330163311633216333163341633516336163371633816339163401634116342163431634416345163461634716348163491635016351163521635316354163551635616357163581635916360163611636216363163641636516366163671636816369163701637116372163731637416375163761637716378163791638016381163821638316384163851638616387163881638916390163911639216393163941639516396163971639816399164001640116402164031640416405164061640716408164091641016411164121641316414164151641616417164181641916420164211642216423164241642516426164271642816429164301643116432164331643416435164361643716438164391644016441164421644316444164451644616447164481644916450164511645216453164541645516456164571645816459164601646116462164631646416465164661646716468164691647016471164721647316474164751647616477164781647916480164811648216483164841648516486164871648816489164901649116492164931649416495164961649716498164991650016501165021650316504165051650616507165081650916510165111651216513165141651516516165171651816519165201652116522165231652416525165261652716528165291653016531165321653316534165351653616537165381653916540165411654216543165441654516546165471654816549165501655116552165531655416555165561655716558165591656016561165621656316564165651656616567165681656916570165711657216573165741657516576165771657816579165801658116582165831658416585165861658716588165891659016591165921659316594165951659616597165981659916600166011660216603166041660516606166071660816609166101661116612166131661416615166161661716618166191662016621166221662316624166251662616627166281662916630166311663216633166341663516636166371663816639166401664116642166431664416645166461664716648166491665016651166521665316654166551665616657166581665916660166611666216663166641666516666166671666816669166701667116672166731667416675166761667716678166791668016681166821668316684166851668616687166881668916690166911669216693166941669516696166971669816699167001670116702167031670416705167061670716708167091671016711167121671316714167151671616717167181671916720167211672216723167241672516726167271672816729167301673116732167331673416735167361673716738167391674016741167421674316744167451674616747167481674916750167511675216753167541675516756167571675816759167601676116762167631676416765167661676716768167691677016771167721677316774167751677616777167781677916780167811678216783167841678516786167871678816789167901679116792167931679416795167961679716798167991680016801168021680316804168051680616807168081680916810168111681216813168141681516816168171681816819168201682116822168231682416825168261682716828168291683016831168321683316834168351683616837168381683916840168411684216843168441684516846168471684816849168501685116852168531685416855168561685716858168591686016861168621686316864168651686616867168681686916870168711687216873168741687516876168771687816879168801688116882168831688416885168861688716888168891689016891168921689316894168951689616897168981689916900169011690216903169041690516906169071690816909169101691116912169131691416915169161691716918169191692016921169221692316924169251692616927169281692916930169311693216933169341693516936169371693816939169401694116942169431694416945169461694716948169491695016951169521695316954169551695616957169581695916960169611696216963169641696516966169671696816969169701697116972169731697416975169761697716978169791698016981169821698316984169851698616987169881698916990169911699216993169941699516996169971699816999170001700117002170031700417005170061700717008170091701017011170121701317014170151701617017170181701917020170211702217023170241702517026170271702817029170301703117032170331703417035170361703717038170391704017041170421704317044170451704617047170481704917050170511705217053170541705517056170571705817059170601706117062170631706417065170661706717068170691707017071170721707317074170751707617077170781707917080170811708217083170841708517086170871708817089170901709117092170931709417095170961709717098170991710017101171021710317104171051710617107171081710917110171111711217113171141711517116171171711817119171201712117122171231712417125171261712717128171291713017131171321713317134171351713617137171381713917140171411714217143171441714517146171471714817149171501715117152171531715417155171561715717158171591716017161171621716317164171651716617167171681716917170171711717217173171741717517176171771717817179171801718117182171831718417185171861718717188171891719017191171921719317194171951719617197171981719917200172011720217203172041720517206172071720817209172101721117212172131721417215172161721717218172191722017221172221722317224172251722617227172281722917230172311723217233172341723517236172371723817239172401724117242172431724417245172461724717248172491725017251172521725317254172551725617257172581725917260172611726217263172641726517266172671726817269172701727117272172731727417275172761727717278172791728017281172821728317284172851728617287172881728917290172911729217293172941729517296172971729817299173001730117302173031730417305173061730717308173091731017311173121731317314173151731617317173181731917320173211732217323173241732517326173271732817329173301733117332173331733417335173361733717338173391734017341173421734317344173451734617347173481734917350173511735217353173541735517356173571735817359173601736117362173631736417365173661736717368173691737017371173721737317374173751737617377173781737917380173811738217383173841738517386173871738817389173901739117392173931739417395173961739717398173991740017401174021740317404174051740617407174081740917410174111741217413174141741517416174171741817419174201742117422174231742417425174261742717428174291743017431174321743317434174351743617437174381743917440174411744217443174441744517446174471744817449174501745117452174531745417455174561745717458174591746017461174621746317464174651746617467174681746917470174711747217473174741747517476174771747817479174801748117482174831748417485174861748717488174891749017491174921749317494174951749617497174981749917500175011750217503175041750517506175071750817509175101751117512175131751417515175161751717518175191752017521175221752317524175251752617527175281752917530175311753217533175341753517536175371753817539175401754117542175431754417545175461754717548175491755017551175521755317554175551755617557175581755917560175611756217563175641756517566175671756817569175701757117572175731757417575175761757717578175791758017581175821758317584175851758617587175881758917590175911759217593175941759517596175971759817599176001760117602176031760417605176061760717608176091761017611176121761317614176151761617617176181761917620176211762217623176241762517626176271762817629176301763117632176331763417635176361763717638176391764017641176421764317644176451764617647176481764917650176511765217653176541765517656176571765817659176601766117662176631766417665176661766717668176691767017671176721767317674176751767617677176781767917680176811768217683176841768517686176871768817689176901769117692176931769417695176961769717698176991770017701177021770317704177051770617707177081770917710177111771217713177141771517716177171771817719177201772117722177231772417725177261772717728177291773017731177321773317734177351773617737177381773917740177411774217743177441774517746177471774817749177501775117752177531775417755177561775717758177591776017761177621776317764177651776617767177681776917770177711777217773177741777517776177771777817779177801778117782177831778417785177861778717788177891779017791177921779317794177951779617797177981779917800178011780217803178041780517806178071780817809178101781117812178131781417815178161781717818178191782017821178221782317824178251782617827178281782917830178311783217833178341783517836178371783817839178401784117842178431784417845178461784717848178491785017851178521785317854178551785617857178581785917860178611786217863178641786517866178671786817869178701787117872178731787417875178761787717878178791788017881178821788317884178851788617887178881788917890178911789217893178941789517896178971789817899179001790117902179031790417905179061790717908179091791017911179121791317914179151791617917179181791917920179211792217923179241792517926179271792817929179301793117932179331793417935179361793717938179391794017941179421794317944179451794617947179481794917950179511795217953179541795517956179571795817959179601796117962179631796417965179661796717968179691797017971179721797317974179751797617977179781797917980179811798217983179841798517986179871798817989179901799117992179931799417995179961799717998179991800018001180021800318004180051800618007180081800918010180111801218013180141801518016180171801818019180201802118022180231802418025180261802718028180291803018031180321803318034180351803618037180381803918040180411804218043180441804518046180471804818049180501805118052180531805418055180561805718058180591806018061180621806318064180651806618067180681806918070180711807218073180741807518076180771807818079180801808118082180831808418085180861808718088180891809018091180921809318094180951809618097180981809918100181011810218103181041810518106181071810818109181101811118112181131811418115181161811718118181191812018121181221812318124181251812618127181281812918130181311813218133181341813518136181371813818139181401814118142181431814418145181461814718148181491815018151181521815318154181551815618157181581815918160181611816218163181641816518166181671816818169181701817118172181731817418175181761817718178181791818018181181821818318184181851818618187181881818918190181911819218193181941819518196181971819818199182001820118202182031820418205182061820718208182091821018211182121821318214182151821618217182181821918220182211822218223182241822518226182271822818229182301823118232182331823418235182361823718238182391824018241182421824318244182451824618247182481824918250182511825218253182541825518256182571825818259182601826118262182631826418265182661826718268182691827018271182721827318274182751827618277182781827918280182811828218283182841828518286182871828818289182901829118292182931829418295182961829718298182991830018301183021830318304183051830618307183081830918310183111831218313183141831518316183171831818319183201832118322183231832418325183261832718328183291833018331183321833318334183351833618337183381833918340183411834218343183441834518346183471834818349183501835118352183531835418355183561835718358183591836018361183621836318364183651836618367183681836918370183711837218373183741837518376183771837818379183801838118382183831838418385183861838718388183891839018391183921839318394183951839618397183981839918400184011840218403184041840518406184071840818409184101841118412184131841418415184161841718418184191842018421184221842318424184251842618427184281842918430184311843218433184341843518436184371843818439184401844118442184431844418445184461844718448184491845018451184521845318454184551845618457184581845918460184611846218463184641846518466184671846818469184701847118472184731847418475184761847718478184791848018481184821848318484184851848618487184881848918490184911849218493184941849518496184971849818499185001850118502185031850418505185061850718508185091851018511185121851318514185151851618517185181851918520185211852218523185241852518526185271852818529185301853118532185331853418535185361853718538185391854018541185421854318544185451854618547185481854918550185511855218553185541855518556185571855818559185601856118562185631856418565185661856718568185691857018571185721857318574185751857618577185781857918580185811858218583185841858518586185871858818589185901859118592185931859418595185961859718598185991860018601186021860318604186051860618607186081860918610186111861218613186141861518616186171861818619186201862118622186231862418625186261862718628186291863018631186321863318634186351863618637186381863918640186411864218643186441864518646186471864818649186501865118652186531865418655186561865718658186591866018661186621866318664186651866618667186681866918670186711867218673186741867518676186771867818679186801868118682186831868418685186861868718688186891869018691186921869318694186951869618697186981869918700187011870218703187041870518706187071870818709187101871118712187131871418715187161871718718187191872018721187221872318724187251872618727187281872918730187311873218733187341873518736187371873818739187401874118742187431874418745187461874718748187491875018751187521875318754187551875618757187581875918760187611876218763187641876518766187671876818769187701877118772187731877418775187761877718778187791878018781187821878318784187851878618787187881878918790187911879218793187941879518796187971879818799188001880118802188031880418805188061880718808188091881018811188121881318814188151881618817188181881918820188211882218823188241882518826188271882818829188301883118832188331883418835188361883718838188391884018841188421884318844188451884618847188481884918850188511885218853188541885518856188571885818859188601886118862188631886418865188661886718868188691887018871188721887318874188751887618877188781887918880188811888218883188841888518886188871888818889188901889118892188931889418895188961889718898188991890018901189021890318904189051890618907189081890918910189111891218913189141891518916189171891818919189201892118922189231892418925189261892718928189291893018931189321893318934189351893618937189381893918940189411894218943189441894518946189471894818949189501895118952189531895418955189561895718958189591896018961189621896318964189651896618967189681896918970189711897218973189741897518976189771897818979189801898118982189831898418985189861898718988189891899018991189921899318994189951899618997189981899919000190011900219003190041900519006190071900819009190101901119012190131901419015190161901719018190191902019021190221902319024190251902619027190281902919030190311903219033190341903519036190371903819039190401904119042190431904419045190461904719048190491905019051190521905319054190551905619057190581905919060190611906219063190641906519066190671906819069190701907119072190731907419075190761907719078190791908019081190821908319084190851908619087190881908919090190911909219093190941909519096190971909819099191001910119102191031910419105191061910719108191091911019111191121911319114191151911619117191181911919120191211912219123191241912519126191271912819129191301913119132191331913419135191361913719138191391914019141191421914319144191451914619147191481914919150191511915219153191541915519156191571915819159191601916119162191631916419165191661916719168191691917019171191721917319174191751917619177191781917919180191811918219183191841918519186191871918819189191901919119192191931919419195191961919719198191991920019201192021920319204192051920619207192081920919210192111921219213192141921519216192171921819219192201922119222192231922419225192261922719228192291923019231192321923319234192351923619237192381923919240192411924219243192441924519246192471924819249192501925119252192531925419255192561925719258192591926019261192621926319264192651926619267192681926919270192711927219273192741927519276192771927819279192801928119282192831928419285192861928719288192891929019291192921929319294192951929619297192981929919300193011930219303193041930519306193071930819309193101931119312193131931419315193161931719318193191932019321193221932319324193251932619327193281932919330193311933219333193341933519336193371933819339193401934119342193431934419345193461934719348193491935019351193521935319354193551935619357193581935919360193611936219363193641936519366193671936819369193701937119372193731937419375193761937719378193791938019381193821938319384193851938619387193881938919390193911939219393193941939519396193971939819399194001940119402194031940419405194061940719408194091941019411194121941319414194151941619417194181941919420194211942219423194241942519426194271942819429194301943119432194331943419435194361943719438194391944019441194421944319444194451944619447194481944919450194511945219453194541945519456194571945819459194601946119462194631946419465194661946719468194691947019471194721947319474194751947619477194781947919480194811948219483194841948519486194871948819489194901949119492194931949419495194961949719498194991950019501195021950319504195051950619507195081950919510195111951219513195141951519516195171951819519195201952119522195231952419525195261952719528195291953019531195321953319534195351953619537195381953919540195411954219543195441954519546195471954819549195501955119552195531955419555195561955719558195591956019561195621956319564195651956619567195681956919570195711957219573195741957519576195771957819579195801958119582195831958419585195861958719588195891959019591195921959319594195951959619597195981959919600196011960219603196041960519606196071960819609196101961119612196131961419615196161961719618196191962019621196221962319624196251962619627196281962919630196311963219633196341963519636196371963819639196401964119642196431964419645196461964719648196491965019651196521965319654196551965619657196581965919660196611966219663196641966519666196671966819669196701967119672196731967419675196761967719678196791968019681196821968319684196851968619687196881968919690196911969219693196941969519696196971969819699197001970119702197031970419705197061970719708197091971019711197121971319714197151971619717197181971919720197211972219723197241972519726197271972819729197301973119732197331973419735197361973719738197391974019741197421974319744197451974619747197481974919750197511975219753197541975519756197571975819759197601976119762197631976419765197661976719768197691977019771197721977319774197751977619777197781977919780197811978219783197841978519786197871978819789197901979119792197931979419795197961979719798197991980019801198021980319804198051980619807198081980919810198111981219813198141981519816198171981819819198201982119822198231982419825198261982719828198291983019831198321983319834198351983619837198381983919840198411984219843198441984519846198471984819849198501985119852198531985419855198561985719858198591986019861198621986319864198651986619867198681986919870198711987219873198741987519876198771987819879198801988119882198831988419885198861988719888198891989019891198921989319894198951989619897198981989919900199011990219903199041990519906199071990819909199101991119912199131991419915199161991719918199191992019921199221992319924199251992619927199281992919930199311993219933199341993519936199371993819939199401994119942199431994419945199461994719948199491995019951199521995319954199551995619957199581995919960199611996219963199641996519966199671996819969199701997119972199731997419975199761997719978199791998019981199821998319984199851998619987199881998919990199911999219993199941999519996199971999819999200002000120002200032000420005200062000720008200092001020011200122001320014200152001620017200182001920020200212002220023200242002520026200272002820029200302003120032200332003420035200362003720038200392004020041200422004320044200452004620047200482004920050200512005220053200542005520056200572005820059200602006120062200632006420065200662006720068200692007020071200722007320074200752007620077200782007920080200812008220083200842008520086200872008820089200902009120092200932009420095200962009720098200992010020101201022010320104201052010620107201082010920110201112011220113201142011520116201172011820119201202012120122201232012420125201262012720128201292013020131201322013320134201352013620137201382013920140201412014220143201442014520146201472014820149201502015120152201532015420155201562015720158201592016020161201622016320164201652016620167201682016920170201712017220173201742017520176201772017820179201802018120182201832018420185201862018720188201892019020191201922019320194201952019620197201982019920200202012020220203202042020520206202072020820209202102021120212202132021420215202162021720218202192022020221202222022320224202252022620227202282022920230202312023220233202342023520236202372023820239202402024120242202432024420245202462024720248202492025020251202522025320254202552025620257202582025920260202612026220263202642026520266202672026820269202702027120272202732027420275202762027720278202792028020281202822028320284202852028620287202882028920290202912029220293202942029520296202972029820299203002030120302203032030420305203062030720308203092031020311203122031320314203152031620317203182031920320203212032220323203242032520326203272032820329203302033120332203332033420335203362033720338203392034020341203422034320344203452034620347203482034920350203512035220353203542035520356203572035820359203602036120362203632036420365203662036720368203692037020371203722037320374203752037620377203782037920380203812038220383203842038520386203872038820389203902039120392203932039420395203962039720398203992040020401204022040320404204052040620407204082040920410204112041220413204142041520416204172041820419204202042120422204232042420425204262042720428204292043020431204322043320434204352043620437204382043920440204412044220443204442044520446204472044820449204502045120452204532045420455204562045720458204592046020461204622046320464204652046620467204682046920470204712047220473204742047520476204772047820479204802048120482204832048420485204862048720488204892049020491204922049320494204952049620497204982049920500205012050220503205042050520506205072050820509205102051120512205132051420515205162051720518205192052020521205222052320524205252052620527205282052920530205312053220533205342053520536205372053820539205402054120542205432054420545205462054720548205492055020551205522055320554205552055620557205582055920560205612056220563205642056520566205672056820569205702057120572205732057420575205762057720578205792058020581205822058320584205852058620587205882058920590205912059220593205942059520596205972059820599206002060120602206032060420605206062060720608206092061020611206122061320614206152061620617206182061920620206212062220623206242062520626206272062820629206302063120632206332063420635206362063720638206392064020641206422064320644206452064620647206482064920650206512065220653206542065520656206572065820659206602066120662206632066420665206662066720668206692067020671206722067320674206752067620677206782067920680206812068220683206842068520686206872068820689206902069120692206932069420695206962069720698206992070020701207022070320704207052070620707207082070920710207112071220713207142071520716207172071820719207202072120722207232072420725207262072720728207292073020731207322073320734207352073620737207382073920740207412074220743207442074520746207472074820749207502075120752207532075420755207562075720758207592076020761207622076320764207652076620767207682076920770207712077220773207742077520776207772077820779207802078120782207832078420785207862078720788207892079020791207922079320794207952079620797207982079920800208012080220803208042080520806208072080820809208102081120812208132081420815208162081720818208192082020821208222082320824208252082620827208282082920830208312083220833208342083520836208372083820839208402084120842208432084420845208462084720848208492085020851208522085320854208552085620857208582085920860208612086220863208642086520866208672086820869208702087120872208732087420875208762087720878208792088020881208822088320884208852088620887208882088920890208912089220893208942089520896208972089820899209002090120902209032090420905209062090720908209092091020911209122091320914209152091620917209182091920920209212092220923209242092520926209272092820929209302093120932209332093420935209362093720938209392094020941209422094320944209452094620947209482094920950209512095220953209542095520956209572095820959209602096120962209632096420965209662096720968209692097020971209722097320974209752097620977209782097920980209812098220983209842098520986209872098820989209902099120992209932099420995209962099720998209992100021001210022100321004210052100621007210082100921010210112101221013210142101521016210172101821019210202102121022210232102421025210262102721028210292103021031210322103321034210352103621037210382103921040210412104221043210442104521046210472104821049210502105121052210532105421055210562105721058210592106021061210622106321064210652106621067210682106921070210712107221073210742107521076210772107821079210802108121082210832108421085210862108721088210892109021091210922109321094210952109621097210982109921100211012110221103211042110521106211072110821109211102111121112211132111421115211162111721118211192112021121211222112321124211252112621127211282112921130211312113221133211342113521136211372113821139211402114121142211432114421145211462114721148211492115021151211522115321154211552115621157211582115921160211612116221163211642116521166211672116821169211702117121172211732117421175211762117721178211792118021181211822118321184211852118621187211882118921190211912119221193211942119521196211972119821199212002120121202212032120421205212062120721208212092121021211212122121321214212152121621217212182121921220212212122221223212242122521226212272122821229212302123121232212332123421235212362123721238212392124021241212422124321244212452124621247212482124921250212512125221253212542125521256212572125821259212602126121262212632126421265212662126721268212692127021271212722127321274212752127621277212782127921280212812128221283212842128521286212872128821289212902129121292212932129421295212962129721298212992130021301213022130321304213052130621307213082130921310213112131221313213142131521316213172131821319213202132121322213232132421325213262132721328213292133021331213322133321334213352133621337213382133921340213412134221343213442134521346213472134821349213502135121352213532135421355213562135721358213592136021361213622136321364213652136621367213682136921370213712137221373213742137521376213772137821379213802138121382213832138421385213862138721388213892139021391213922139321394213952139621397213982139921400214012140221403214042140521406214072140821409214102141121412214132141421415214162141721418214192142021421214222142321424214252142621427214282142921430214312143221433214342143521436214372143821439214402144121442214432144421445214462144721448214492145021451214522145321454214552145621457214582145921460214612146221463214642146521466214672146821469214702147121472214732147421475214762147721478214792148021481214822148321484214852148621487214882148921490214912149221493214942149521496214972149821499215002150121502215032150421505215062150721508215092151021511215122151321514215152151621517215182151921520215212152221523215242152521526215272152821529215302153121532215332153421535215362153721538215392154021541215422154321544215452154621547215482154921550215512155221553215542155521556215572155821559215602156121562215632156421565215662156721568215692157021571215722157321574215752157621577215782157921580215812158221583215842158521586215872158821589215902159121592215932159421595215962159721598215992160021601216022160321604216052160621607216082160921610216112161221613216142161521616216172161821619216202162121622216232162421625216262162721628216292163021631216322163321634216352163621637216382163921640216412164221643216442164521646216472164821649216502165121652216532165421655216562165721658216592166021661216622166321664216652166621667216682166921670216712167221673216742167521676216772167821679216802168121682216832168421685216862168721688216892169021691216922169321694216952169621697216982169921700217012170221703217042170521706217072170821709217102171121712217132171421715217162171721718217192172021721217222172321724217252172621727217282172921730217312173221733217342173521736217372173821739217402174121742217432174421745217462174721748217492175021751217522175321754217552175621757217582175921760217612176221763217642176521766217672176821769217702177121772217732177421775217762177721778217792178021781217822178321784217852178621787217882178921790217912179221793217942179521796217972179821799218002180121802218032180421805218062180721808218092181021811218122181321814218152181621817218182181921820218212182221823218242182521826218272182821829218302183121832218332183421835218362183721838218392184021841218422184321844218452184621847218482184921850218512185221853218542185521856218572185821859218602186121862218632186421865218662186721868218692187021871218722187321874218752187621877218782187921880218812188221883218842188521886218872188821889218902189121892218932189421895218962189721898218992190021901219022190321904219052190621907219082190921910219112191221913219142191521916219172191821919219202192121922219232192421925219262192721928219292193021931219322193321934219352193621937219382193921940219412194221943219442194521946219472194821949219502195121952219532195421955219562195721958219592196021961219622196321964219652196621967219682196921970219712197221973219742197521976219772197821979219802198121982219832198421985219862198721988219892199021991219922199321994219952199621997219982199922000220012200222003220042200522006220072200822009220102201122012220132201422015220162201722018220192202022021220222202322024220252202622027220282202922030220312203222033220342203522036220372203822039220402204122042220432204422045220462204722048220492205022051220522205322054220552205622057220582205922060220612206222063220642206522066220672206822069220702207122072220732207422075220762207722078220792208022081220822208322084220852208622087220882208922090220912209222093220942209522096220972209822099221002210122102221032210422105221062210722108221092211022111221122211322114221152211622117221182211922120221212212222123221242212522126221272212822129221302213122132221332213422135221362213722138221392214022141221422214322144221452214622147221482214922150221512215222153221542215522156221572215822159221602216122162221632216422165221662216722168221692217022171221722217322174221752217622177221782217922180221812218222183221842218522186221872218822189221902219122192221932219422195221962219722198221992220022201222022220322204222052220622207222082220922210222112221222213222142221522216222172221822219222202222122222222232222422225222262222722228222292223022231222322223322234222352223622237222382223922240222412224222243222442224522246222472224822249222502225122252222532225422255222562225722258222592226022261222622226322264222652226622267222682226922270222712227222273222742227522276222772227822279222802228122282222832228422285222862228722288222892229022291222922229322294222952229622297222982229922300223012230222303223042230522306223072230822309223102231122312223132231422315223162231722318223192232022321223222232322324223252232622327223282232922330223312233222333223342233522336223372233822339223402234122342223432234422345223462234722348223492235022351223522235322354223552235622357223582235922360223612236222363223642236522366223672236822369223702237122372223732237422375223762237722378223792238022381223822238322384223852238622387223882238922390223912239222393223942239522396223972239822399224002240122402224032240422405224062240722408224092241022411224122241322414224152241622417224182241922420224212242222423224242242522426224272242822429224302243122432224332243422435224362243722438224392244022441224422244322444224452244622447224482244922450224512245222453224542245522456224572245822459224602246122462224632246422465224662246722468224692247022471224722247322474224752247622477224782247922480224812248222483224842248522486224872248822489224902249122492224932249422495224962249722498224992250022501225022250322504225052250622507225082250922510225112251222513225142251522516225172251822519225202252122522225232252422525225262252722528225292253022531225322253322534225352253622537225382253922540225412254222543225442254522546225472254822549225502255122552225532255422555225562255722558225592256022561225622256322564225652256622567225682256922570225712257222573225742257522576225772257822579225802258122582225832258422585225862258722588225892259022591225922259322594225952259622597225982259922600226012260222603226042260522606226072260822609226102261122612226132261422615226162261722618226192262022621226222262322624226252262622627226282262922630226312263222633226342263522636226372263822639226402264122642226432264422645226462264722648226492265022651226522265322654226552265622657226582265922660226612266222663226642266522666226672266822669226702267122672226732267422675226762267722678226792268022681226822268322684226852268622687226882268922690226912269222693226942269522696226972269822699227002270122702227032270422705227062270722708227092271022711227122271322714227152271622717227182271922720227212272222723227242272522726227272272822729227302273122732227332273422735227362273722738227392274022741227422274322744227452274622747227482274922750227512275222753227542275522756227572275822759227602276122762227632276422765227662276722768227692277022771227722277322774227752277622777227782277922780227812278222783227842278522786227872278822789227902279122792227932279422795227962279722798227992280022801228022280322804228052280622807228082280922810228112281222813228142281522816228172281822819228202282122822228232282422825228262282722828228292283022831228322283322834228352283622837228382283922840228412284222843228442284522846228472284822849228502285122852228532285422855228562285722858228592286022861228622286322864228652286622867228682286922870228712287222873228742287522876228772287822879228802288122882228832288422885228862288722888228892289022891228922289322894228952289622897228982289922900229012290222903229042290522906229072290822909229102291122912229132291422915229162291722918229192292022921229222292322924229252292622927229282292922930229312293222933229342293522936229372293822939229402294122942229432294422945229462294722948229492295022951229522295322954229552295622957229582295922960229612296222963229642296522966229672296822969229702297122972229732297422975229762297722978229792298022981229822298322984229852298622987229882298922990229912299222993229942299522996229972299822999230002300123002230032300423005230062300723008230092301023011230122301323014230152301623017230182301923020230212302223023230242302523026230272302823029230302303123032230332303423035230362303723038230392304023041230422304323044230452304623047230482304923050230512305223053230542305523056230572305823059230602306123062230632306423065230662306723068230692307023071230722307323074230752307623077230782307923080230812308223083230842308523086230872308823089230902309123092230932309423095230962309723098230992310023101231022310323104231052310623107231082310923110231112311223113231142311523116231172311823119231202312123122231232312423125231262312723128231292313023131231322313323134231352313623137231382313923140231412314223143231442314523146231472314823149231502315123152231532315423155231562315723158231592316023161231622316323164231652316623167231682316923170231712317223173231742317523176231772317823179231802318123182231832318423185231862318723188231892319023191231922319323194231952319623197231982319923200232012320223203232042320523206232072320823209232102321123212232132321423215232162321723218232192322023221232222322323224232252322623227232282322923230232312323223233232342323523236232372323823239232402324123242232432324423245232462324723248232492325023251232522325323254232552325623257232582325923260232612326223263232642326523266232672326823269232702327123272232732327423275232762327723278232792328023281232822328323284232852328623287232882328923290232912329223293232942329523296232972329823299233002330123302233032330423305233062330723308233092331023311233122331323314233152331623317233182331923320233212332223323233242332523326233272332823329233302333123332233332333423335233362333723338233392334023341233422334323344233452334623347233482334923350233512335223353233542335523356233572335823359233602336123362233632336423365233662336723368233692337023371233722337323374233752337623377233782337923380233812338223383233842338523386233872338823389233902339123392233932339423395233962339723398233992340023401234022340323404234052340623407234082340923410234112341223413234142341523416234172341823419234202342123422234232342423425234262342723428234292343023431234322343323434234352343623437234382343923440234412344223443234442344523446234472344823449234502345123452234532345423455234562345723458234592346023461234622346323464234652346623467234682346923470234712347223473234742347523476234772347823479234802348123482234832348423485234862348723488234892349023491234922349323494234952349623497234982349923500235012350223503235042350523506235072350823509235102351123512235132351423515235162351723518235192352023521235222352323524235252352623527235282352923530235312353223533235342353523536235372353823539235402354123542235432354423545235462354723548235492355023551235522355323554235552355623557235582355923560235612356223563235642356523566235672356823569235702357123572235732357423575235762357723578235792358023581235822358323584235852358623587235882358923590235912359223593235942359523596235972359823599236002360123602236032360423605236062360723608236092361023611236122361323614236152361623617236182361923620236212362223623236242362523626236272362823629236302363123632236332363423635236362363723638236392364023641236422364323644236452364623647236482364923650236512365223653236542365523656236572365823659236602366123662236632366423665236662366723668236692367023671236722367323674236752367623677236782367923680236812368223683236842368523686236872368823689236902369123692236932369423695236962369723698236992370023701237022370323704237052370623707237082370923710237112371223713237142371523716237172371823719237202372123722237232372423725237262372723728237292373023731237322373323734237352373623737237382373923740237412374223743237442374523746237472374823749237502375123752237532375423755237562375723758237592376023761237622376323764237652376623767237682376923770237712377223773237742377523776237772377823779237802378123782237832378423785237862378723788237892379023791237922379323794237952379623797237982379923800238012380223803238042380523806238072380823809238102381123812238132381423815238162381723818238192382023821238222382323824238252382623827238282382923830238312383223833238342383523836238372383823839238402384123842238432384423845238462384723848238492385023851238522385323854238552385623857238582385923860238612386223863238642386523866238672386823869238702387123872238732387423875238762387723878238792388023881238822388323884238852388623887238882388923890238912389223893238942389523896238972389823899239002390123902239032390423905239062390723908239092391023911239122391323914239152391623917239182391923920239212392223923239242392523926239272392823929239302393123932239332393423935239362393723938239392394023941239422394323944239452394623947239482394923950239512395223953239542395523956239572395823959239602396123962239632396423965239662396723968239692397023971239722397323974239752397623977239782397923980239812398223983239842398523986239872398823989239902399123992239932399423995239962399723998239992400024001240022400324004240052400624007240082400924010240112401224013240142401524016240172401824019240202402124022240232402424025240262402724028240292403024031240322403324034240352403624037240382403924040240412404224043240442404524046240472404824049240502405124052240532405424055240562405724058240592406024061240622406324064240652406624067240682406924070240712407224073240742407524076240772407824079240802408124082240832408424085240862408724088240892409024091240922409324094240952409624097240982409924100241012410224103241042410524106241072410824109241102411124112241132411424115241162411724118241192412024121241222412324124241252412624127241282412924130241312413224133241342413524136241372413824139241402414124142241432414424145241462414724148241492415024151241522415324154241552415624157241582415924160241612416224163241642416524166241672416824169241702417124172241732417424175241762417724178241792418024181241822418324184241852418624187241882418924190241912419224193241942419524196241972419824199242002420124202242032420424205242062420724208242092421024211242122421324214242152421624217242182421924220242212422224223242242422524226242272422824229242302423124232242332423424235242362423724238242392424024241242422424324244242452424624247242482424924250242512425224253242542425524256242572425824259242602426124262242632426424265242662426724268242692427024271242722427324274242752427624277242782427924280242812428224283242842428524286242872428824289242902429124292242932429424295242962429724298242992430024301243022430324304243052430624307243082430924310243112431224313243142431524316243172431824319243202432124322243232432424325243262432724328243292433024331243322433324334243352433624337243382433924340243412434224343243442434524346243472434824349243502435124352243532435424355243562435724358243592436024361243622436324364243652436624367243682436924370243712437224373243742437524376243772437824379243802438124382243832438424385243862438724388243892439024391243922439324394243952439624397243982439924400244012440224403244042440524406244072440824409244102441124412244132441424415244162441724418244192442024421244222442324424244252442624427244282442924430244312443224433244342443524436244372443824439244402444124442244432444424445244462444724448244492445024451244522445324454244552445624457244582445924460244612446224463244642446524466244672446824469244702447124472244732447424475244762447724478244792448024481244822448324484244852448624487244882448924490244912449224493244942449524496244972449824499245002450124502245032450424505245062450724508245092451024511245122451324514245152451624517245182451924520245212452224523245242452524526245272452824529245302453124532245332453424535245362453724538245392454024541245422454324544245452454624547245482454924550245512455224553245542455524556245572455824559245602456124562245632456424565245662456724568245692457024571245722457324574245752457624577245782457924580245812458224583245842458524586245872458824589245902459124592245932459424595245962459724598245992460024601246022460324604246052460624607246082460924610246112461224613246142461524616246172461824619246202462124622246232462424625246262462724628246292463024631246322463324634246352463624637246382463924640246412464224643246442464524646246472464824649246502465124652246532465424655246562465724658246592466024661246622466324664246652466624667246682466924670246712467224673246742467524676246772467824679246802468124682246832468424685246862468724688246892469024691246922469324694246952469624697246982469924700247012470224703247042470524706247072470824709247102471124712247132471424715247162471724718247192472024721247222472324724247252472624727247282472924730247312473224733247342473524736247372473824739247402474124742247432474424745247462474724748247492475024751247522475324754247552475624757247582475924760247612476224763247642476524766247672476824769247702477124772247732477424775247762477724778247792478024781247822478324784247852478624787247882478924790247912479224793247942479524796247972479824799248002480124802248032480424805248062480724808248092481024811248122481324814248152481624817248182481924820248212482224823248242482524826248272482824829248302483124832248332483424835248362483724838248392484024841248422484324844248452484624847248482484924850248512485224853248542485524856248572485824859248602486124862248632486424865248662486724868248692487024871248722487324874248752487624877248782487924880248812488224883248842488524886248872488824889248902489124892248932489424895248962489724898248992490024901249022490324904249052490624907249082490924910249112491224913249142491524916249172491824919249202492124922249232492424925249262492724928249292493024931249322493324934249352493624937249382493924940249412494224943249442494524946249472494824949249502495124952249532495424955249562495724958249592496024961249622496324964249652496624967249682496924970249712497224973249742497524976249772497824979249802498124982249832498424985249862498724988249892499024991249922499324994249952499624997249982499925000250012500225003250042500525006250072500825009250102501125012250132501425015250162501725018250192502025021250222502325024250252502625027250282502925030250312503225033250342503525036250372503825039250402504125042250432504425045250462504725048250492505025051250522505325054250552505625057250582505925060250612506225063250642506525066250672506825069250702507125072250732507425075250762507725078250792508025081250822508325084250852508625087250882508925090250912509225093250942509525096250972509825099251002510125102251032510425105251062510725108251092511025111251122511325114251152511625117251182511925120251212512225123251242512525126251272512825129251302513125132251332513425135251362513725138251392514025141251422514325144251452514625147251482514925150251512515225153251542515525156251572515825159251602516125162251632516425165251662516725168251692517025171251722517325174251752517625177251782517925180251812518225183251842518525186251872518825189251902519125192251932519425195251962519725198251992520025201252022520325204252052520625207252082520925210252112521225213252142521525216252172521825219252202522125222252232522425225252262522725228252292523025231252322523325234252352523625237252382523925240252412524225243252442524525246252472524825249252502525125252252532525425255252562525725258252592526025261252622526325264252652526625267252682526925270252712527225273252742527525276252772527825279252802528125282252832528425285252862528725288252892529025291252922529325294252952529625297252982529925300253012530225303253042530525306253072530825309253102531125312253132531425315253162531725318253192532025321253222532325324253252532625327253282532925330253312533225333253342533525336253372533825339253402534125342253432534425345253462534725348253492535025351253522535325354253552535625357253582535925360253612536225363253642536525366253672536825369253702537125372253732537425375253762537725378253792538025381253822538325384253852538625387253882538925390253912539225393253942539525396253972539825399254002540125402254032540425405254062540725408254092541025411254122541325414254152541625417254182541925420254212542225423254242542525426254272542825429254302543125432254332543425435254362543725438254392544025441254422544325444254452544625447254482544925450254512545225453254542545525456254572545825459254602546125462254632546425465254662546725468254692547025471254722547325474254752547625477254782547925480254812548225483254842548525486254872548825489254902549125492254932549425495254962549725498254992550025501255022550325504255052550625507255082550925510255112551225513255142551525516255172551825519255202552125522255232552425525255262552725528255292553025531255322553325534255352553625537255382553925540255412554225543255442554525546255472554825549255502555125552255532555425555255562555725558255592556025561255622556325564255652556625567255682556925570255712557225573255742557525576255772557825579255802558125582255832558425585255862558725588255892559025591255922559325594255952559625597255982559925600256012560225603256042560525606256072560825609256102561125612256132561425615256162561725618256192562025621256222562325624256252562625627256282562925630256312563225633256342563525636256372563825639256402564125642256432564425645256462564725648256492565025651256522565325654256552565625657256582565925660256612566225663256642566525666256672566825669256702567125672256732567425675256762567725678256792568025681256822568325684256852568625687256882568925690256912569225693256942569525696256972569825699257002570125702257032570425705257062570725708257092571025711257122571325714257152571625717257182571925720257212572225723257242572525726257272572825729257302573125732257332573425735257362573725738257392574025741257422574325744257452574625747257482574925750257512575225753257542575525756257572575825759257602576125762257632576425765257662576725768257692577025771257722577325774257752577625777257782577925780257812578225783257842578525786257872578825789257902579125792257932579425795257962579725798257992580025801258022580325804258052580625807258082580925810258112581225813258142581525816258172581825819258202582125822258232582425825258262582725828258292583025831258322583325834258352583625837258382583925840258412584225843258442584525846258472584825849258502585125852258532585425855258562585725858258592586025861258622586325864258652586625867258682586925870258712587225873258742587525876258772587825879258802588125882258832588425885258862588725888258892589025891258922589325894258952589625897258982589925900259012590225903259042590525906259072590825909259102591125912259132591425915259162591725918259192592025921259222592325924259252592625927259282592925930259312593225933259342593525936259372593825939259402594125942259432594425945259462594725948259492595025951259522595325954259552595625957259582595925960259612596225963259642596525966259672596825969259702597125972259732597425975259762597725978259792598025981259822598325984259852598625987259882598925990259912599225993259942599525996259972599825999260002600126002260032600426005260062600726008260092601026011260122601326014260152601626017260182601926020260212602226023260242602526026260272602826029260302603126032260332603426035260362603726038260392604026041260422604326044260452604626047260482604926050260512605226053260542605526056260572605826059260602606126062260632606426065260662606726068260692607026071260722607326074260752607626077260782607926080260812608226083260842608526086260872608826089260902609126092260932609426095260962609726098260992610026101261022610326104261052610626107261082610926110261112611226113261142611526116261172611826119261202612126122261232612426125261262612726128261292613026131261322613326134261352613626137261382613926140261412614226143261442614526146261472614826149261502615126152261532615426155261562615726158261592616026161261622616326164261652616626167261682616926170261712617226173261742617526176261772617826179261802618126182261832618426185261862618726188261892619026191261922619326194261952619626197261982619926200262012620226203262042620526206262072620826209262102621126212262132621426215262162621726218262192622026221262222622326224262252622626227262282622926230262312623226233262342623526236262372623826239262402624126242262432624426245262462624726248262492625026251262522625326254262552625626257262582625926260262612626226263262642626526266262672626826269262702627126272262732627426275262762627726278262792628026281262822628326284262852628626287262882628926290262912629226293262942629526296262972629826299263002630126302263032630426305263062630726308263092631026311263122631326314263152631626317263182631926320263212632226323263242632526326263272632826329263302633126332263332633426335263362633726338263392634026341263422634326344263452634626347263482634926350263512635226353263542635526356263572635826359263602636126362263632636426365263662636726368263692637026371263722637326374263752637626377263782637926380263812638226383263842638526386263872638826389263902639126392263932639426395263962639726398263992640026401264022640326404264052640626407264082640926410264112641226413264142641526416264172641826419264202642126422264232642426425264262642726428264292643026431264322643326434264352643626437264382643926440264412644226443264442644526446264472644826449264502645126452264532645426455264562645726458264592646026461264622646326464264652646626467264682646926470264712647226473264742647526476264772647826479264802648126482264832648426485264862648726488264892649026491264922649326494264952649626497264982649926500265012650226503265042650526506265072650826509265102651126512265132651426515265162651726518265192652026521265222652326524265252652626527265282652926530265312653226533265342653526536265372653826539265402654126542265432654426545265462654726548265492655026551265522655326554265552655626557265582655926560265612656226563265642656526566265672656826569265702657126572265732657426575265762657726578265792658026581265822658326584265852658626587265882658926590265912659226593265942659526596265972659826599266002660126602266032660426605266062660726608266092661026611266122661326614266152661626617266182661926620266212662226623266242662526626266272662826629266302663126632266332663426635266362663726638266392664026641266422664326644266452664626647266482664926650266512665226653266542665526656266572665826659266602666126662266632666426665266662666726668266692667026671266722667326674266752667626677266782667926680266812668226683266842668526686266872668826689266902669126692266932669426695266962669726698266992670026701267022670326704267052670626707267082670926710267112671226713267142671526716267172671826719267202672126722267232672426725267262672726728267292673026731267322673326734267352673626737267382673926740267412674226743267442674526746267472674826749267502675126752267532675426755267562675726758267592676026761267622676326764267652676626767267682676926770267712677226773267742677526776267772677826779267802678126782267832678426785267862678726788267892679026791267922679326794267952679626797267982679926800268012680226803268042680526806268072680826809268102681126812268132681426815268162681726818268192682026821268222682326824268252682626827268282682926830268312683226833268342683526836268372683826839268402684126842268432684426845268462684726848268492685026851268522685326854268552685626857268582685926860268612686226863268642686526866268672686826869268702687126872268732687426875268762687726878268792688026881268822688326884268852688626887268882688926890268912689226893268942689526896268972689826899269002690126902269032690426905269062690726908269092691026911269122691326914269152691626917269182691926920269212692226923269242692526926269272692826929269302693126932269332693426935269362693726938269392694026941269422694326944269452694626947269482694926950269512695226953269542695526956269572695826959269602696126962269632696426965269662696726968269692697026971269722697326974269752697626977269782697926980269812698226983269842698526986269872698826989269902699126992269932699426995269962699726998269992700027001270022700327004270052700627007270082700927010270112701227013270142701527016270172701827019270202702127022270232702427025270262702727028270292703027031270322703327034270352703627037270382703927040270412704227043270442704527046270472704827049270502705127052270532705427055270562705727058270592706027061270622706327064270652706627067270682706927070270712707227073270742707527076270772707827079270802708127082270832708427085270862708727088270892709027091270922709327094270952709627097270982709927100271012710227103271042710527106271072710827109271102711127112271132711427115271162711727118271192712027121271222712327124271252712627127271282712927130271312713227133271342713527136271372713827139271402714127142271432714427145271462714727148271492715027151271522715327154271552715627157271582715927160271612716227163271642716527166271672716827169271702717127172271732717427175271762717727178271792718027181271822718327184271852718627187271882718927190271912719227193271942719527196271972719827199272002720127202272032720427205272062720727208272092721027211272122721327214272152721627217272182721927220272212722227223272242722527226272272722827229272302723127232272332723427235272362723727238272392724027241272422724327244272452724627247272482724927250272512725227253272542725527256272572725827259272602726127262272632726427265272662726727268272692727027271272722727327274272752727627277272782727927280272812728227283272842728527286272872728827289272902729127292272932729427295272962729727298272992730027301273022730327304273052730627307273082730927310273112731227313273142731527316273172731827319273202732127322273232732427325273262732727328273292733027331273322733327334273352733627337273382733927340273412734227343273442734527346273472734827349273502735127352273532735427355273562735727358273592736027361273622736327364273652736627367273682736927370273712737227373273742737527376273772737827379273802738127382273832738427385273862738727388273892739027391273922739327394273952739627397273982739927400274012740227403274042740527406274072740827409274102741127412274132741427415274162741727418274192742027421274222742327424274252742627427274282742927430274312743227433274342743527436274372743827439274402744127442274432744427445274462744727448274492745027451274522745327454274552745627457274582745927460274612746227463274642746527466274672746827469274702747127472274732747427475274762747727478274792748027481274822748327484274852748627487274882748927490274912749227493274942749527496274972749827499275002750127502275032750427505275062750727508275092751027511275122751327514275152751627517275182751927520275212752227523275242752527526275272752827529275302753127532275332753427535275362753727538275392754027541275422754327544275452754627547275482754927550275512755227553275542755527556275572755827559275602756127562275632756427565275662756727568275692757027571275722757327574275752757627577275782757927580275812758227583275842758527586275872758827589275902759127592275932759427595275962759727598275992760027601276022760327604276052760627607276082760927610276112761227613276142761527616276172761827619276202762127622276232762427625276262762727628276292763027631276322763327634276352763627637276382763927640276412764227643276442764527646276472764827649276502765127652276532765427655276562765727658276592766027661276622766327664276652766627667276682766927670276712767227673276742767527676276772767827679276802768127682276832768427685276862768727688276892769027691276922769327694276952769627697276982769927700277012770227703277042770527706277072770827709277102771127712277132771427715277162771727718277192772027721277222772327724277252772627727277282772927730277312773227733277342773527736277372773827739277402774127742277432774427745277462774727748277492775027751277522775327754277552775627757277582775927760277612776227763277642776527766277672776827769277702777127772277732777427775277762777727778277792778027781277822778327784277852778627787277882778927790277912779227793277942779527796277972779827799278002780127802278032780427805278062780727808278092781027811278122781327814278152781627817278182781927820278212782227823278242782527826278272782827829278302783127832278332783427835278362783727838278392784027841278422784327844278452784627847278482784927850278512785227853278542785527856278572785827859278602786127862278632786427865278662786727868278692787027871278722787327874278752787627877278782787927880278812788227883278842788527886278872788827889278902789127892278932789427895278962789727898278992790027901279022790327904279052790627907279082790927910279112791227913279142791527916279172791827919279202792127922279232792427925279262792727928279292793027931279322793327934279352793627937279382793927940279412794227943279442794527946279472794827949279502795127952279532795427955279562795727958279592796027961279622796327964279652796627967279682796927970279712797227973279742797527976279772797827979279802798127982279832798427985279862798727988279892799027991279922799327994279952799627997279982799928000280012800228003280042800528006280072800828009280102801128012280132801428015280162801728018280192802028021280222802328024280252802628027280282802928030280312803228033280342803528036280372803828039280402804128042280432804428045280462804728048280492805028051280522805328054280552805628057280582805928060280612806228063280642806528066280672806828069280702807128072280732807428075280762807728078280792808028081280822808328084280852808628087280882808928090280912809228093280942809528096280972809828099281002810128102281032810428105281062810728108281092811028111281122811328114281152811628117281182811928120281212812228123281242812528126281272812828129281302813128132281332813428135281362813728138281392814028141281422814328144281452814628147281482814928150281512815228153281542815528156281572815828159281602816128162281632816428165281662816728168281692817028171281722817328174281752817628177281782817928180281812818228183281842818528186281872818828189281902819128192281932819428195281962819728198281992820028201282022820328204282052820628207282082820928210282112821228213282142821528216282172821828219282202822128222282232822428225282262822728228282292823028231282322823328234282352823628237282382823928240282412824228243282442824528246282472824828249282502825128252282532825428255282562825728258282592826028261282622826328264282652826628267282682826928270282712827228273282742827528276282772827828279282802828128282282832828428285282862828728288282892829028291282922829328294282952829628297282982829928300283012830228303283042830528306283072830828309283102831128312283132831428315283162831728318283192832028321283222832328324283252832628327283282832928330283312833228333283342833528336283372833828339283402834128342283432834428345283462834728348283492835028351283522835328354283552835628357283582835928360283612836228363283642836528366283672836828369283702837128372283732837428375283762837728378283792838028381283822838328384283852838628387283882838928390283912839228393283942839528396283972839828399284002840128402284032840428405284062840728408284092841028411284122841328414284152841628417284182841928420284212842228423284242842528426284272842828429284302843128432284332843428435284362843728438284392844028441284422844328444284452844628447284482844928450284512845228453284542845528456284572845828459284602846128462284632846428465284662846728468284692847028471284722847328474284752847628477284782847928480284812848228483284842848528486284872848828489284902849128492284932849428495284962849728498284992850028501285022850328504285052850628507285082850928510285112851228513285142851528516285172851828519285202852128522285232852428525285262852728528285292853028531285322853328534285352853628537285382853928540285412854228543285442854528546285472854828549285502855128552285532855428555285562855728558285592856028561285622856328564285652856628567285682856928570285712857228573285742857528576285772857828579285802858128582285832858428585285862858728588285892859028591285922859328594285952859628597285982859928600286012860228603286042860528606286072860828609286102861128612286132861428615286162861728618286192862028621286222862328624286252862628627286282862928630286312863228633286342863528636286372863828639286402864128642286432864428645286462864728648286492865028651286522865328654286552865628657286582865928660286612866228663286642866528666286672866828669286702867128672286732867428675286762867728678286792868028681286822868328684286852868628687286882868928690286912869228693286942869528696286972869828699287002870128702287032870428705287062870728708287092871028711287122871328714287152871628717287182871928720287212872228723287242872528726287272872828729287302873128732287332873428735287362873728738287392874028741287422874328744287452874628747287482874928750287512875228753287542875528756287572875828759287602876128762287632876428765287662876728768287692877028771287722877328774287752877628777287782877928780287812878228783287842878528786287872878828789287902879128792287932879428795287962879728798287992880028801288022880328804288052880628807288082880928810288112881228813288142881528816288172881828819288202882128822288232882428825288262882728828288292883028831288322883328834288352883628837288382883928840288412884228843288442884528846288472884828849288502885128852288532885428855288562885728858288592886028861288622886328864288652886628867288682886928870288712887228873288742887528876288772887828879288802888128882288832888428885288862888728888288892889028891288922889328894288952889628897288982889928900289012890228903289042890528906289072890828909289102891128912289132891428915289162891728918289192892028921289222892328924289252892628927289282892928930289312893228933289342893528936289372893828939289402894128942289432894428945289462894728948289492895028951289522895328954289552895628957289582895928960289612896228963289642896528966289672896828969289702897128972289732897428975289762897728978289792898028981289822898328984289852898628987289882898928990289912899228993289942899528996289972899828999290002900129002290032900429005290062900729008290092901029011290122901329014290152901629017290182901929020290212902229023290242902529026290272902829029290302903129032290332903429035290362903729038290392904029041290422904329044290452904629047290482904929050290512905229053290542905529056290572905829059290602906129062290632906429065290662906729068290692907029071290722907329074290752907629077290782907929080290812908229083290842908529086290872908829089290902909129092290932909429095290962909729098290992910029101291022910329104291052910629107291082910929110291112911229113291142911529116291172911829119291202912129122291232912429125291262912729128291292913029131291322913329134291352913629137291382913929140291412914229143291442914529146291472914829149291502915129152291532915429155291562915729158291592916029161291622916329164291652916629167291682916929170291712917229173291742917529176291772917829179291802918129182291832918429185291862918729188291892919029191291922919329194291952919629197291982919929200292012920229203292042920529206292072920829209292102921129212292132921429215292162921729218292192922029221292222922329224292252922629227292282922929230292312923229233292342923529236292372923829239292402924129242292432924429245292462924729248292492925029251292522925329254292552925629257292582925929260292612926229263292642926529266292672926829269292702927129272292732927429275292762927729278292792928029281292822928329284292852928629287292882928929290292912929229293292942929529296292972929829299293002930129302293032930429305293062930729308293092931029311293122931329314293152931629317293182931929320293212932229323293242932529326293272932829329293302933129332293332933429335293362933729338293392934029341293422934329344293452934629347293482934929350293512935229353293542935529356293572935829359293602936129362293632936429365293662936729368293692937029371293722937329374293752937629377293782937929380293812938229383293842938529386293872938829389293902939129392293932939429395293962939729398293992940029401294022940329404294052940629407294082940929410294112941229413294142941529416294172941829419294202942129422294232942429425294262942729428294292943029431294322943329434294352943629437294382943929440294412944229443294442944529446294472944829449294502945129452294532945429455294562945729458294592946029461294622946329464294652946629467294682946929470294712947229473294742947529476294772947829479294802948129482294832948429485294862948729488294892949029491294922949329494294952949629497294982949929500295012950229503295042950529506295072950829509295102951129512295132951429515295162951729518295192952029521295222952329524295252952629527295282952929530295312953229533295342953529536295372953829539295402954129542295432954429545295462954729548295492955029551295522955329554295552955629557295582955929560295612956229563295642956529566295672956829569295702957129572295732957429575295762957729578295792958029581295822958329584295852958629587295882958929590295912959229593295942959529596295972959829599296002960129602296032960429605296062960729608296092961029611296122961329614296152961629617296182961929620296212962229623296242962529626296272962829629296302963129632296332963429635296362963729638296392964029641296422964329644296452964629647296482964929650296512965229653296542965529656296572965829659296602966129662296632966429665296662966729668296692967029671296722967329674296752967629677296782967929680296812968229683296842968529686296872968829689296902969129692296932969429695296962969729698296992970029701297022970329704297052970629707297082970929710297112971229713297142971529716297172971829719297202972129722297232972429725297262972729728297292973029731297322973329734297352973629737297382973929740297412974229743297442974529746297472974829749297502975129752297532975429755297562975729758297592976029761297622976329764297652976629767297682976929770297712977229773297742977529776297772977829779297802978129782297832978429785297862978729788297892979029791297922979329794297952979629797297982979929800298012980229803298042980529806298072980829809298102981129812298132981429815298162981729818298192982029821298222982329824298252982629827298282982929830298312983229833298342983529836298372983829839298402984129842298432984429845298462984729848298492985029851298522985329854298552985629857298582985929860298612986229863298642986529866298672986829869298702987129872298732987429875298762987729878298792988029881298822988329884298852988629887298882988929890298912989229893298942989529896298972989829899299002990129902299032990429905299062990729908299092991029911299122991329914299152991629917299182991929920299212992229923299242992529926299272992829929299302993129932299332993429935299362993729938299392994029941299422994329944299452994629947299482994929950299512995229953299542995529956299572995829959299602996129962299632996429965299662996729968299692997029971299722997329974299752997629977299782997929980299812998229983299842998529986299872998829989299902999129992299932999429995299962999729998299993000030001300023000330004300053000630007300083000930010300113001230013300143001530016300173001830019300203002130022300233002430025300263002730028300293003030031300323003330034300353003630037300383003930040300413004230043300443004530046300473004830049300503005130052300533005430055300563005730058300593006030061300623006330064300653006630067300683006930070300713007230073300743007530076300773007830079300803008130082300833008430085300863008730088300893009030091300923009330094300953009630097300983009930100301013010230103301043010530106301073010830109301103011130112301133011430115301163011730118301193012030121301223012330124301253012630127301283012930130301313013230133301343013530136301373013830139301403014130142301433014430145301463014730148301493015030151301523015330154301553015630157301583015930160301613016230163301643016530166301673016830169301703017130172301733017430175301763017730178301793018030181301823018330184301853018630187301883018930190301913019230193301943019530196301973019830199302003020130202302033020430205302063020730208302093021030211302123021330214302153021630217302183021930220302213022230223302243022530226302273022830229302303023130232302333023430235302363023730238302393024030241302423024330244302453024630247302483024930250302513025230253302543025530256302573025830259302603026130262302633026430265302663026730268302693027030271302723027330274302753027630277302783027930280302813028230283302843028530286302873028830289302903029130292302933029430295302963029730298302993030030301303023030330304303053030630307303083030930310303113031230313303143031530316303173031830319303203032130322303233032430325303263032730328303293033030331303323033330334303353033630337303383033930340303413034230343303443034530346303473034830349303503035130352303533035430355303563035730358303593036030361303623036330364303653036630367303683036930370303713037230373303743037530376303773037830379303803038130382303833038430385303863038730388303893039030391303923039330394303953039630397303983039930400304013040230403304043040530406304073040830409304103041130412304133041430415304163041730418304193042030421304223042330424304253042630427304283042930430304313043230433304343043530436304373043830439304403044130442304433044430445304463044730448304493045030451304523045330454304553045630457304583045930460304613046230463304643046530466304673046830469304703047130472304733047430475304763047730478304793048030481304823048330484304853048630487304883048930490304913049230493304943049530496304973049830499305003050130502305033050430505305063050730508305093051030511305123051330514305153051630517305183051930520305213052230523305243052530526305273052830529305303053130532305333053430535305363053730538305393054030541305423054330544305453054630547305483054930550305513055230553305543055530556305573055830559305603056130562305633056430565305663056730568305693057030571305723057330574305753057630577305783057930580305813058230583305843058530586305873058830589305903059130592305933059430595305963059730598305993060030601306023060330604306053060630607306083060930610306113061230613306143061530616306173061830619306203062130622306233062430625306263062730628306293063030631306323063330634306353063630637306383063930640306413064230643306443064530646306473064830649306503065130652306533065430655306563065730658306593066030661306623066330664306653066630667306683066930670306713067230673306743067530676306773067830679306803068130682306833068430685306863068730688306893069030691306923069330694306953069630697306983069930700307013070230703307043070530706307073070830709307103071130712307133071430715307163071730718307193072030721307223072330724307253072630727307283072930730307313073230733307343073530736307373073830739307403074130742307433074430745307463074730748307493075030751307523075330754307553075630757307583075930760307613076230763307643076530766307673076830769307703077130772307733077430775307763077730778307793078030781307823078330784307853078630787307883078930790307913079230793307943079530796307973079830799308003080130802308033080430805308063080730808308093081030811308123081330814308153081630817308183081930820308213082230823308243082530826308273082830829308303083130832308333083430835308363083730838308393084030841308423084330844308453084630847308483084930850308513085230853308543085530856308573085830859308603086130862308633086430865308663086730868308693087030871308723087330874308753087630877308783087930880308813088230883308843088530886308873088830889308903089130892308933089430895308963089730898308993090030901309023090330904309053090630907309083090930910309113091230913309143091530916309173091830919309203092130922309233092430925309263092730928309293093030931309323093330934309353093630937309383093930940309413094230943309443094530946309473094830949309503095130952309533095430955309563095730958309593096030961309623096330964309653096630967309683096930970309713097230973309743097530976309773097830979309803098130982309833098430985309863098730988309893099030991309923099330994309953099630997309983099931000310013100231003310043100531006310073100831009310103101131012310133101431015310163101731018310193102031021310223102331024310253102631027310283102931030310313103231033310343103531036310373103831039310403104131042310433104431045310463104731048310493105031051310523105331054310553105631057310583105931060310613106231063310643106531066310673106831069310703107131072310733107431075310763107731078310793108031081310823108331084310853108631087310883108931090310913109231093310943109531096310973109831099311003110131102311033110431105311063110731108311093111031111311123111331114311153111631117311183111931120311213112231123311243112531126311273112831129311303113131132311333113431135311363113731138311393114031141311423114331144311453114631147311483114931150311513115231153311543115531156311573115831159311603116131162311633116431165311663116731168311693117031171311723117331174311753117631177311783117931180311813118231183311843118531186311873118831189311903119131192311933119431195311963119731198311993120031201312023120331204312053120631207312083120931210312113121231213312143121531216312173121831219312203122131222312233122431225312263122731228312293123031231312323123331234312353123631237312383123931240312413124231243312443124531246312473124831249312503125131252312533125431255312563125731258312593126031261312623126331264312653126631267312683126931270312713127231273312743127531276312773127831279312803128131282312833128431285312863128731288312893129031291312923129331294312953129631297312983129931300313013130231303313043130531306313073130831309313103131131312313133131431315313163131731318313193132031321313223132331324313253132631327313283132931330313313133231333313343133531336313373133831339313403134131342313433134431345313463134731348313493135031351313523135331354313553135631357313583135931360313613136231363313643136531366313673136831369313703137131372313733137431375313763137731378313793138031381313823138331384313853138631387313883138931390313913139231393313943139531396313973139831399314003140131402314033140431405314063140731408314093141031411314123141331414314153141631417314183141931420314213142231423314243142531426314273142831429314303143131432314333143431435314363143731438314393144031441314423144331444314453144631447314483144931450314513145231453314543145531456314573145831459314603146131462314633146431465314663146731468314693147031471314723147331474314753147631477314783147931480314813148231483314843148531486314873148831489314903149131492314933149431495314963149731498314993150031501315023150331504315053150631507315083150931510315113151231513315143151531516315173151831519315203152131522315233152431525315263152731528315293153031531315323153331534315353153631537315383153931540315413154231543315443154531546315473154831549315503155131552315533155431555315563155731558315593156031561315623156331564315653156631567315683156931570315713157231573315743157531576315773157831579315803158131582315833158431585315863158731588315893159031591315923159331594315953159631597315983159931600316013160231603316043160531606316073160831609316103161131612316133161431615316163161731618316193162031621316223162331624316253162631627316283162931630316313163231633316343163531636316373163831639316403164131642316433164431645316463164731648316493165031651316523165331654316553165631657316583165931660316613166231663316643166531666316673166831669316703167131672316733167431675316763167731678316793168031681316823168331684316853168631687316883168931690316913169231693316943169531696316973169831699317003170131702317033170431705317063170731708317093171031711317123171331714317153171631717317183171931720317213172231723317243172531726317273172831729317303173131732317333173431735317363173731738317393174031741317423174331744317453174631747317483174931750317513175231753317543175531756317573175831759317603176131762317633176431765317663176731768317693177031771317723177331774317753177631777317783177931780317813178231783317843178531786317873178831789317903179131792317933179431795317963179731798317993180031801318023180331804
  1. diff -Nur linux-4.4.62.orig/arch/arm/include/asm/switch_to.h linux-4.4.62/arch/arm/include/asm/switch_to.h
  2. --- linux-4.4.62.orig/arch/arm/include/asm/switch_to.h 2017-04-18 07:15:37.000000000 +0200
  3. +++ linux-4.4.62/arch/arm/include/asm/switch_to.h 2017-04-18 17:38:07.902637922 +0200
  4. @@ -3,6 +3,13 @@
  5. #include <linux/thread_info.h>
  6. +#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM
  7. +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p);
  8. +#else
  9. +static inline void
  10. +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
  11. +#endif
  12. +
  13. /*
  14. * For v7 SMP cores running a preemptible kernel we may be pre-empted
  15. * during a TLB maintenance operation, so execute an inner-shareable dsb
  16. @@ -25,6 +32,7 @@
  17. #define switch_to(prev,next,last) \
  18. do { \
  19. __complete_pending_tlbi(); \
  20. + switch_kmaps(prev, next); \
  21. last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \
  22. } while (0)
  23. diff -Nur linux-4.4.62.orig/arch/arm/include/asm/thread_info.h linux-4.4.62/arch/arm/include/asm/thread_info.h
  24. --- linux-4.4.62.orig/arch/arm/include/asm/thread_info.h 2017-04-18 07:15:37.000000000 +0200
  25. +++ linux-4.4.62/arch/arm/include/asm/thread_info.h 2017-04-18 17:38:07.902637922 +0200
  26. @@ -49,6 +49,7 @@
  27. struct thread_info {
  28. unsigned long flags; /* low level flags */
  29. int preempt_count; /* 0 => preemptable, <0 => bug */
  30. + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
  31. mm_segment_t addr_limit; /* address limit */
  32. struct task_struct *task; /* main task structure */
  33. __u32 cpu; /* cpu */
  34. @@ -142,7 +143,8 @@
  35. #define TIF_SYSCALL_TRACE 4 /* syscall trace active */
  36. #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */
  37. #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
  38. -#define TIF_SECCOMP 7 /* seccomp syscall filtering active */
  39. +#define TIF_SECCOMP 8 /* seccomp syscall filtering active */
  40. +#define TIF_NEED_RESCHED_LAZY 7
  41. #define TIF_NOHZ 12 /* in adaptive nohz mode */
  42. #define TIF_USING_IWMMXT 17
  43. @@ -152,6 +154,7 @@
  44. #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
  45. #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
  46. #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
  47. +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
  48. #define _TIF_UPROBE (1 << TIF_UPROBE)
  49. #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
  50. #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
  51. @@ -167,7 +170,8 @@
  52. * Change these and you break ASM code in entry-common.S
  53. */
  54. #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
  55. - _TIF_NOTIFY_RESUME | _TIF_UPROBE)
  56. + _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
  57. + _TIF_NEED_RESCHED_LAZY)
  58. #endif /* __KERNEL__ */
  59. #endif /* __ASM_ARM_THREAD_INFO_H */
  60. diff -Nur linux-4.4.62.orig/arch/arm/Kconfig linux-4.4.62/arch/arm/Kconfig
  61. --- linux-4.4.62.orig/arch/arm/Kconfig 2017-04-18 07:15:37.000000000 +0200
  62. +++ linux-4.4.62/arch/arm/Kconfig 2017-04-18 17:38:07.902637922 +0200
  63. @@ -33,7 +33,7 @@
  64. select HARDIRQS_SW_RESEND
  65. select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT)
  66. select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
  67. - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32
  68. + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !PREEMPT_RT_BASE
  69. select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32
  70. select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
  71. select HAVE_ARCH_TRACEHOOK
  72. @@ -68,6 +68,7 @@
  73. select HAVE_PERF_EVENTS
  74. select HAVE_PERF_REGS
  75. select HAVE_PERF_USER_STACK_DUMP
  76. + select HAVE_PREEMPT_LAZY
  77. select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE)
  78. select HAVE_REGS_AND_STACK_ACCESS_API
  79. select HAVE_SYSCALL_TRACEPOINTS
  80. diff -Nur linux-4.4.62.orig/arch/arm/kernel/asm-offsets.c linux-4.4.62/arch/arm/kernel/asm-offsets.c
  81. --- linux-4.4.62.orig/arch/arm/kernel/asm-offsets.c 2017-04-18 07:15:37.000000000 +0200
  82. +++ linux-4.4.62/arch/arm/kernel/asm-offsets.c 2017-04-18 17:38:07.902637922 +0200
  83. @@ -65,6 +65,7 @@
  84. BLANK();
  85. DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
  86. DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
  87. + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
  88. DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
  89. DEFINE(TI_TASK, offsetof(struct thread_info, task));
  90. DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
  91. diff -Nur linux-4.4.62.orig/arch/arm/kernel/entry-armv.S linux-4.4.62/arch/arm/kernel/entry-armv.S
  92. --- linux-4.4.62.orig/arch/arm/kernel/entry-armv.S 2017-04-18 07:15:37.000000000 +0200
  93. +++ linux-4.4.62/arch/arm/kernel/entry-armv.S 2017-04-18 17:38:07.902637922 +0200
  94. @@ -215,11 +215,18 @@
  95. #ifdef CONFIG_PREEMPT
  96. get_thread_info tsk
  97. ldr r8, [tsk, #TI_PREEMPT] @ get preempt count
  98. - ldr r0, [tsk, #TI_FLAGS] @ get flags
  99. teq r8, #0 @ if preempt count != 0
  100. + bne 1f @ return from exeption
  101. + ldr r0, [tsk, #TI_FLAGS] @ get flags
  102. + tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set
  103. + blne svc_preempt @ preempt!
  104. +
  105. + ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
  106. + teq r8, #0 @ if preempt lazy count != 0
  107. movne r0, #0 @ force flags to 0
  108. - tst r0, #_TIF_NEED_RESCHED
  109. + tst r0, #_TIF_NEED_RESCHED_LAZY
  110. blne svc_preempt
  111. +1:
  112. #endif
  113. svc_exit r5, irq = 1 @ return from exception
  114. @@ -234,8 +241,14 @@
  115. 1: bl preempt_schedule_irq @ irq en/disable is done inside
  116. ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
  117. tst r0, #_TIF_NEED_RESCHED
  118. + bne 1b
  119. + tst r0, #_TIF_NEED_RESCHED_LAZY
  120. reteq r8 @ go again
  121. - b 1b
  122. + ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
  123. + teq r0, #0 @ if preempt lazy count != 0
  124. + beq 1b
  125. + ret r8 @ go again
  126. +
  127. #endif
  128. __und_fault:
  129. diff -Nur linux-4.4.62.orig/arch/arm/kernel/entry-common.S linux-4.4.62/arch/arm/kernel/entry-common.S
  130. --- linux-4.4.62.orig/arch/arm/kernel/entry-common.S 2017-04-18 07:15:37.000000000 +0200
  131. +++ linux-4.4.62/arch/arm/kernel/entry-common.S 2017-04-18 17:38:07.906638076 +0200
  132. @@ -36,7 +36,9 @@
  133. UNWIND(.cantunwind )
  134. disable_irq_notrace @ disable interrupts
  135. ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
  136. - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
  137. + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP)
  138. + bne fast_work_pending
  139. + tst r1, #_TIF_SECCOMP
  140. bne fast_work_pending
  141. /* perform architecture specific actions before user return */
  142. @@ -62,8 +64,11 @@
  143. str r0, [sp, #S_R0 + S_OFF]! @ save returned r0
  144. disable_irq_notrace @ disable interrupts
  145. ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
  146. - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
  147. + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP)
  148. + bne do_slower_path
  149. + tst r1, #_TIF_SECCOMP
  150. beq no_work_pending
  151. +do_slower_path:
  152. UNWIND(.fnend )
  153. ENDPROC(ret_fast_syscall)
  154. diff -Nur linux-4.4.62.orig/arch/arm/kernel/patch.c linux-4.4.62/arch/arm/kernel/patch.c
  155. --- linux-4.4.62.orig/arch/arm/kernel/patch.c 2017-04-18 07:15:37.000000000 +0200
  156. +++ linux-4.4.62/arch/arm/kernel/patch.c 2017-04-18 17:38:07.906638076 +0200
  157. @@ -15,7 +15,7 @@
  158. unsigned int insn;
  159. };
  160. -static DEFINE_SPINLOCK(patch_lock);
  161. +static DEFINE_RAW_SPINLOCK(patch_lock);
  162. static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
  163. __acquires(&patch_lock)
  164. @@ -32,7 +32,7 @@
  165. return addr;
  166. if (flags)
  167. - spin_lock_irqsave(&patch_lock, *flags);
  168. + raw_spin_lock_irqsave(&patch_lock, *flags);
  169. else
  170. __acquire(&patch_lock);
  171. @@ -47,7 +47,7 @@
  172. clear_fixmap(fixmap);
  173. if (flags)
  174. - spin_unlock_irqrestore(&patch_lock, *flags);
  175. + raw_spin_unlock_irqrestore(&patch_lock, *flags);
  176. else
  177. __release(&patch_lock);
  178. }
  179. diff -Nur linux-4.4.62.orig/arch/arm/kernel/process.c linux-4.4.62/arch/arm/kernel/process.c
  180. --- linux-4.4.62.orig/arch/arm/kernel/process.c 2017-04-18 07:15:37.000000000 +0200
  181. +++ linux-4.4.62/arch/arm/kernel/process.c 2017-04-18 17:38:07.906638076 +0200
  182. @@ -319,6 +319,30 @@
  183. }
  184. #ifdef CONFIG_MMU
  185. +/*
  186. + * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock. If the lock is not
  187. + * initialized by pgtable_page_ctor() then a coredump of the vector page will
  188. + * fail.
  189. + */
  190. +static int __init vectors_user_mapping_init_page(void)
  191. +{
  192. + struct page *page;
  193. + unsigned long addr = 0xffff0000;
  194. + pgd_t *pgd;
  195. + pud_t *pud;
  196. + pmd_t *pmd;
  197. +
  198. + pgd = pgd_offset_k(addr);
  199. + pud = pud_offset(pgd, addr);
  200. + pmd = pmd_offset(pud, addr);
  201. + page = pmd_page(*(pmd));
  202. +
  203. + pgtable_page_ctor(page);
  204. +
  205. + return 0;
  206. +}
  207. +late_initcall(vectors_user_mapping_init_page);
  208. +
  209. #ifdef CONFIG_KUSER_HELPERS
  210. /*
  211. * The vectors page is always readable from user space for the
  212. diff -Nur linux-4.4.62.orig/arch/arm/kernel/signal.c linux-4.4.62/arch/arm/kernel/signal.c
  213. --- linux-4.4.62.orig/arch/arm/kernel/signal.c 2017-04-18 07:15:37.000000000 +0200
  214. +++ linux-4.4.62/arch/arm/kernel/signal.c 2017-04-18 17:38:07.906638076 +0200
  215. @@ -572,7 +572,8 @@
  216. */
  217. trace_hardirqs_off();
  218. do {
  219. - if (likely(thread_flags & _TIF_NEED_RESCHED)) {
  220. + if (likely(thread_flags & (_TIF_NEED_RESCHED |
  221. + _TIF_NEED_RESCHED_LAZY))) {
  222. schedule();
  223. } else {
  224. if (unlikely(!user_mode(regs)))
  225. diff -Nur linux-4.4.62.orig/arch/arm/kernel/smp.c linux-4.4.62/arch/arm/kernel/smp.c
  226. --- linux-4.4.62.orig/arch/arm/kernel/smp.c 2017-04-18 07:15:37.000000000 +0200
  227. +++ linux-4.4.62/arch/arm/kernel/smp.c 2017-04-18 17:38:07.906638076 +0200
  228. @@ -230,8 +230,6 @@
  229. flush_cache_louis();
  230. local_flush_tlb_all();
  231. - clear_tasks_mm_cpumask(cpu);
  232. -
  233. return 0;
  234. }
  235. @@ -247,6 +245,9 @@
  236. pr_err("CPU%u: cpu didn't die\n", cpu);
  237. return;
  238. }
  239. +
  240. + clear_tasks_mm_cpumask(cpu);
  241. +
  242. pr_notice("CPU%u: shutdown\n", cpu);
  243. /*
  244. diff -Nur linux-4.4.62.orig/arch/arm/kernel/unwind.c linux-4.4.62/arch/arm/kernel/unwind.c
  245. --- linux-4.4.62.orig/arch/arm/kernel/unwind.c 2017-04-18 07:15:37.000000000 +0200
  246. +++ linux-4.4.62/arch/arm/kernel/unwind.c 2017-04-18 17:38:07.906638076 +0200
  247. @@ -93,7 +93,7 @@
  248. static const struct unwind_idx *__origin_unwind_idx;
  249. extern const struct unwind_idx __stop_unwind_idx[];
  250. -static DEFINE_SPINLOCK(unwind_lock);
  251. +static DEFINE_RAW_SPINLOCK(unwind_lock);
  252. static LIST_HEAD(unwind_tables);
  253. /* Convert a prel31 symbol to an absolute address */
  254. @@ -201,7 +201,7 @@
  255. /* module unwind tables */
  256. struct unwind_table *table;
  257. - spin_lock_irqsave(&unwind_lock, flags);
  258. + raw_spin_lock_irqsave(&unwind_lock, flags);
  259. list_for_each_entry(table, &unwind_tables, list) {
  260. if (addr >= table->begin_addr &&
  261. addr < table->end_addr) {
  262. @@ -213,7 +213,7 @@
  263. break;
  264. }
  265. }
  266. - spin_unlock_irqrestore(&unwind_lock, flags);
  267. + raw_spin_unlock_irqrestore(&unwind_lock, flags);
  268. }
  269. pr_debug("%s: idx = %p\n", __func__, idx);
  270. @@ -529,9 +529,9 @@
  271. tab->begin_addr = text_addr;
  272. tab->end_addr = text_addr + text_size;
  273. - spin_lock_irqsave(&unwind_lock, flags);
  274. + raw_spin_lock_irqsave(&unwind_lock, flags);
  275. list_add_tail(&tab->list, &unwind_tables);
  276. - spin_unlock_irqrestore(&unwind_lock, flags);
  277. + raw_spin_unlock_irqrestore(&unwind_lock, flags);
  278. return tab;
  279. }
  280. @@ -543,9 +543,9 @@
  281. if (!tab)
  282. return;
  283. - spin_lock_irqsave(&unwind_lock, flags);
  284. + raw_spin_lock_irqsave(&unwind_lock, flags);
  285. list_del(&tab->list);
  286. - spin_unlock_irqrestore(&unwind_lock, flags);
  287. + raw_spin_unlock_irqrestore(&unwind_lock, flags);
  288. kfree(tab);
  289. }
  290. diff -Nur linux-4.4.62.orig/arch/arm/kvm/arm.c linux-4.4.62/arch/arm/kvm/arm.c
  291. --- linux-4.4.62.orig/arch/arm/kvm/arm.c 2017-04-18 07:15:37.000000000 +0200
  292. +++ linux-4.4.62/arch/arm/kvm/arm.c 2017-04-18 17:38:07.906638076 +0200
  293. @@ -496,18 +496,18 @@
  294. struct kvm_vcpu *vcpu;
  295. kvm_for_each_vcpu(i, vcpu, kvm) {
  296. - wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
  297. + struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
  298. vcpu->arch.pause = false;
  299. - wake_up_interruptible(wq);
  300. + swake_up(wq);
  301. }
  302. }
  303. static void vcpu_sleep(struct kvm_vcpu *vcpu)
  304. {
  305. - wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
  306. + struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
  307. - wait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
  308. + swait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
  309. (!vcpu->arch.pause)));
  310. }
  311. @@ -566,7 +566,7 @@
  312. * involves poking the GIC, which must be done in a
  313. * non-preemptible context.
  314. */
  315. - preempt_disable();
  316. + migrate_disable();
  317. kvm_timer_flush_hwstate(vcpu);
  318. kvm_vgic_flush_hwstate(vcpu);
  319. @@ -585,7 +585,7 @@
  320. local_irq_enable();
  321. kvm_timer_sync_hwstate(vcpu);
  322. kvm_vgic_sync_hwstate(vcpu);
  323. - preempt_enable();
  324. + migrate_enable();
  325. continue;
  326. }
  327. @@ -639,7 +639,7 @@
  328. kvm_vgic_sync_hwstate(vcpu);
  329. - preempt_enable();
  330. + migrate_enable();
  331. ret = handle_exit(vcpu, run, ret);
  332. }
  333. diff -Nur linux-4.4.62.orig/arch/arm/kvm/psci.c linux-4.4.62/arch/arm/kvm/psci.c
  334. --- linux-4.4.62.orig/arch/arm/kvm/psci.c 2017-04-18 07:15:37.000000000 +0200
  335. +++ linux-4.4.62/arch/arm/kvm/psci.c 2017-04-18 17:38:07.906638076 +0200
  336. @@ -70,7 +70,7 @@
  337. {
  338. struct kvm *kvm = source_vcpu->kvm;
  339. struct kvm_vcpu *vcpu = NULL;
  340. - wait_queue_head_t *wq;
  341. + struct swait_queue_head *wq;
  342. unsigned long cpu_id;
  343. unsigned long context_id;
  344. phys_addr_t target_pc;
  345. @@ -119,7 +119,7 @@
  346. smp_mb(); /* Make sure the above is visible */
  347. wq = kvm_arch_vcpu_wq(vcpu);
  348. - wake_up_interruptible(wq);
  349. + swake_up(wq);
  350. return PSCI_RET_SUCCESS;
  351. }
  352. diff -Nur linux-4.4.62.orig/arch/arm/mach-at91/at91rm9200.c linux-4.4.62/arch/arm/mach-at91/at91rm9200.c
  353. --- linux-4.4.62.orig/arch/arm/mach-at91/at91rm9200.c 2017-04-18 07:15:37.000000000 +0200
  354. +++ linux-4.4.62/arch/arm/mach-at91/at91rm9200.c 2017-04-18 17:38:07.906638076 +0200
  355. @@ -12,7 +12,6 @@
  356. #include <linux/of_platform.h>
  357. #include <asm/mach/arch.h>
  358. -#include <asm/system_misc.h>
  359. #include "generic.h"
  360. #include "soc.h"
  361. @@ -33,7 +32,6 @@
  362. of_platform_populate(NULL, of_default_bus_match_table, NULL, soc_dev);
  363. - arm_pm_idle = at91rm9200_idle;
  364. at91rm9200_pm_init();
  365. }
  366. diff -Nur linux-4.4.62.orig/arch/arm/mach-at91/at91sam9.c linux-4.4.62/arch/arm/mach-at91/at91sam9.c
  367. --- linux-4.4.62.orig/arch/arm/mach-at91/at91sam9.c 2017-04-18 07:15:37.000000000 +0200
  368. +++ linux-4.4.62/arch/arm/mach-at91/at91sam9.c 2017-04-18 17:38:07.906638076 +0200
  369. @@ -62,8 +62,6 @@
  370. soc_dev = soc_device_to_device(soc);
  371. of_platform_populate(NULL, of_default_bus_match_table, NULL, soc_dev);
  372. -
  373. - arm_pm_idle = at91sam9_idle;
  374. }
  375. static void __init at91sam9_dt_device_init(void)
  376. diff -Nur linux-4.4.62.orig/arch/arm/mach-at91/generic.h linux-4.4.62/arch/arm/mach-at91/generic.h
  377. --- linux-4.4.62.orig/arch/arm/mach-at91/generic.h 2017-04-18 07:15:37.000000000 +0200
  378. +++ linux-4.4.62/arch/arm/mach-at91/generic.h 2017-04-18 17:38:07.906638076 +0200
  379. @@ -11,27 +11,18 @@
  380. #ifndef _AT91_GENERIC_H
  381. #define _AT91_GENERIC_H
  382. -#include <linux/of.h>
  383. -#include <linux/reboot.h>
  384. -
  385. - /* Map io */
  386. -extern void __init at91_map_io(void);
  387. -extern void __init at91_alt_map_io(void);
  388. -
  389. -/* idle */
  390. -extern void at91rm9200_idle(void);
  391. -extern void at91sam9_idle(void);
  392. -
  393. #ifdef CONFIG_PM
  394. extern void __init at91rm9200_pm_init(void);
  395. extern void __init at91sam9260_pm_init(void);
  396. extern void __init at91sam9g45_pm_init(void);
  397. extern void __init at91sam9x5_pm_init(void);
  398. +extern void __init sama5_pm_init(void);
  399. #else
  400. static inline void __init at91rm9200_pm_init(void) { }
  401. static inline void __init at91sam9260_pm_init(void) { }
  402. static inline void __init at91sam9g45_pm_init(void) { }
  403. static inline void __init at91sam9x5_pm_init(void) { }
  404. +static inline void __init sama5_pm_init(void) { }
  405. #endif
  406. #endif /* _AT91_GENERIC_H */
  407. diff -Nur linux-4.4.62.orig/arch/arm/mach-at91/Kconfig linux-4.4.62/arch/arm/mach-at91/Kconfig
  408. --- linux-4.4.62.orig/arch/arm/mach-at91/Kconfig 2017-04-18 07:15:37.000000000 +0200
  409. +++ linux-4.4.62/arch/arm/mach-at91/Kconfig 2017-04-18 17:38:07.906638076 +0200
  410. @@ -99,6 +99,7 @@
  411. config COMMON_CLK_AT91
  412. bool
  413. select COMMON_CLK
  414. + select MFD_SYSCON
  415. config HAVE_AT91_SMD
  416. bool
  417. diff -Nur linux-4.4.62.orig/arch/arm/mach-at91/pm.c linux-4.4.62/arch/arm/mach-at91/pm.c
  418. --- linux-4.4.62.orig/arch/arm/mach-at91/pm.c 2017-04-18 07:15:37.000000000 +0200
  419. +++ linux-4.4.62/arch/arm/mach-at91/pm.c 2017-04-18 17:38:07.906638076 +0200
  420. @@ -31,10 +31,13 @@
  421. #include <asm/mach/irq.h>
  422. #include <asm/fncpy.h>
  423. #include <asm/cacheflush.h>
  424. +#include <asm/system_misc.h>
  425. #include "generic.h"
  426. #include "pm.h"
  427. +static void __iomem *pmc;
  428. +
  429. /*
  430. * FIXME: this is needed to communicate between the pinctrl driver and
  431. * the PM implementation in the machine. Possibly part of the PM
  432. @@ -87,7 +90,7 @@
  433. unsigned long scsr;
  434. int i;
  435. - scsr = at91_pmc_read(AT91_PMC_SCSR);
  436. + scsr = readl(pmc + AT91_PMC_SCSR);
  437. /* USB must not be using PLLB */
  438. if ((scsr & at91_pm_data.uhp_udp_mask) != 0) {
  439. @@ -101,8 +104,7 @@
  440. if ((scsr & (AT91_PMC_PCK0 << i)) == 0)
  441. continue;
  442. -
  443. - css = at91_pmc_read(AT91_PMC_PCKR(i)) & AT91_PMC_CSS;
  444. + css = readl(pmc + AT91_PMC_PCKR(i)) & AT91_PMC_CSS;
  445. if (css != AT91_PMC_CSS_SLOW) {
  446. pr_err("AT91: PM - Suspend-to-RAM with PCK%d src %d\n", i, css);
  447. return 0;
  448. @@ -145,8 +147,8 @@
  449. flush_cache_all();
  450. outer_disable();
  451. - at91_suspend_sram_fn(at91_pmc_base, at91_ramc_base[0],
  452. - at91_ramc_base[1], pm_data);
  453. + at91_suspend_sram_fn(pmc, at91_ramc_base[0],
  454. + at91_ramc_base[1], pm_data);
  455. outer_resume();
  456. }
  457. @@ -369,6 +371,21 @@
  458. at91_pm_set_standby(standby);
  459. }
  460. +void at91rm9200_idle(void)
  461. +{
  462. + /*
  463. + * Disable the processor clock. The processor will be automatically
  464. + * re-enabled by an interrupt or by a reset.
  465. + */
  466. + writel(AT91_PMC_PCK, pmc + AT91_PMC_SCDR);
  467. +}
  468. +
  469. +void at91sam9_idle(void)
  470. +{
  471. + writel(AT91_PMC_PCK, pmc + AT91_PMC_SCDR);
  472. + cpu_do_idle();
  473. +}
  474. +
  475. static void __init at91_pm_sram_init(void)
  476. {
  477. struct gen_pool *sram_pool;
  478. @@ -415,13 +432,36 @@
  479. &at91_pm_suspend_in_sram, at91_pm_suspend_in_sram_sz);
  480. }
  481. -static void __init at91_pm_init(void)
  482. +static const struct of_device_id atmel_pmc_ids[] __initconst = {
  483. + { .compatible = "atmel,at91rm9200-pmc" },
  484. + { .compatible = "atmel,at91sam9260-pmc" },
  485. + { .compatible = "atmel,at91sam9g45-pmc" },
  486. + { .compatible = "atmel,at91sam9n12-pmc" },
  487. + { .compatible = "atmel,at91sam9x5-pmc" },
  488. + { .compatible = "atmel,sama5d3-pmc" },
  489. + { .compatible = "atmel,sama5d2-pmc" },
  490. + { /* sentinel */ },
  491. +};
  492. +
  493. +static void __init at91_pm_init(void (*pm_idle)(void))
  494. {
  495. - at91_pm_sram_init();
  496. + struct device_node *pmc_np;
  497. if (at91_cpuidle_device.dev.platform_data)
  498. platform_device_register(&at91_cpuidle_device);
  499. + pmc_np = of_find_matching_node(NULL, atmel_pmc_ids);
  500. + pmc = of_iomap(pmc_np, 0);
  501. + if (!pmc) {
  502. + pr_err("AT91: PM not supported, PMC not found\n");
  503. + return;
  504. + }
  505. +
  506. + if (pm_idle)
  507. + arm_pm_idle = pm_idle;
  508. +
  509. + at91_pm_sram_init();
  510. +
  511. if (at91_suspend_sram_fn)
  512. suspend_set_ops(&at91_pm_ops);
  513. else
  514. @@ -440,7 +480,7 @@
  515. at91_pm_data.uhp_udp_mask = AT91RM9200_PMC_UHP | AT91RM9200_PMC_UDP;
  516. at91_pm_data.memctrl = AT91_MEMCTRL_MC;
  517. - at91_pm_init();
  518. + at91_pm_init(at91rm9200_idle);
  519. }
  520. void __init at91sam9260_pm_init(void)
  521. @@ -448,7 +488,7 @@
  522. at91_dt_ramc();
  523. at91_pm_data.memctrl = AT91_MEMCTRL_SDRAMC;
  524. at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP;
  525. - return at91_pm_init();
  526. + at91_pm_init(at91sam9_idle);
  527. }
  528. void __init at91sam9g45_pm_init(void)
  529. @@ -456,7 +496,7 @@
  530. at91_dt_ramc();
  531. at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP;
  532. at91_pm_data.memctrl = AT91_MEMCTRL_DDRSDR;
  533. - return at91_pm_init();
  534. + at91_pm_init(at91sam9_idle);
  535. }
  536. void __init at91sam9x5_pm_init(void)
  537. @@ -464,5 +504,13 @@
  538. at91_dt_ramc();
  539. at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP;
  540. at91_pm_data.memctrl = AT91_MEMCTRL_DDRSDR;
  541. - return at91_pm_init();
  542. + at91_pm_init(at91sam9_idle);
  543. +}
  544. +
  545. +void __init sama5_pm_init(void)
  546. +{
  547. + at91_dt_ramc();
  548. + at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP;
  549. + at91_pm_data.memctrl = AT91_MEMCTRL_DDRSDR;
  550. + at91_pm_init(NULL);
  551. }
  552. diff -Nur linux-4.4.62.orig/arch/arm/mach-at91/sama5.c linux-4.4.62/arch/arm/mach-at91/sama5.c
  553. --- linux-4.4.62.orig/arch/arm/mach-at91/sama5.c 2017-04-18 07:15:37.000000000 +0200
  554. +++ linux-4.4.62/arch/arm/mach-at91/sama5.c 2017-04-18 17:38:07.906638076 +0200
  555. @@ -51,7 +51,7 @@
  556. soc_dev = soc_device_to_device(soc);
  557. of_platform_populate(NULL, of_default_bus_match_table, NULL, soc_dev);
  558. - at91sam9x5_pm_init();
  559. + sama5_pm_init();
  560. }
  561. static const char *const sama5_dt_board_compat[] __initconst = {
  562. diff -Nur linux-4.4.62.orig/arch/arm/mach-exynos/platsmp.c linux-4.4.62/arch/arm/mach-exynos/platsmp.c
  563. --- linux-4.4.62.orig/arch/arm/mach-exynos/platsmp.c 2017-04-18 07:15:37.000000000 +0200
  564. +++ linux-4.4.62/arch/arm/mach-exynos/platsmp.c 2017-04-18 17:38:07.906638076 +0200
  565. @@ -230,7 +230,7 @@
  566. return (void __iomem *)(S5P_VA_SCU);
  567. }
  568. -static DEFINE_SPINLOCK(boot_lock);
  569. +static DEFINE_RAW_SPINLOCK(boot_lock);
  570. static void exynos_secondary_init(unsigned int cpu)
  571. {
  572. @@ -243,8 +243,8 @@
  573. /*
  574. * Synchronise with the boot thread.
  575. */
  576. - spin_lock(&boot_lock);
  577. - spin_unlock(&boot_lock);
  578. + raw_spin_lock(&boot_lock);
  579. + raw_spin_unlock(&boot_lock);
  580. }
  581. int exynos_set_boot_addr(u32 core_id, unsigned long boot_addr)
  582. @@ -308,7 +308,7 @@
  583. * Set synchronisation state between this boot processor
  584. * and the secondary one
  585. */
  586. - spin_lock(&boot_lock);
  587. + raw_spin_lock(&boot_lock);
  588. /*
  589. * The secondary processor is waiting to be released from
  590. @@ -335,7 +335,7 @@
  591. if (timeout == 0) {
  592. printk(KERN_ERR "cpu1 power enable failed");
  593. - spin_unlock(&boot_lock);
  594. + raw_spin_unlock(&boot_lock);
  595. return -ETIMEDOUT;
  596. }
  597. }
  598. @@ -381,7 +381,7 @@
  599. * calibrations, then wait for it to finish
  600. */
  601. fail:
  602. - spin_unlock(&boot_lock);
  603. + raw_spin_unlock(&boot_lock);
  604. return pen_release != -1 ? ret : 0;
  605. }
  606. diff -Nur linux-4.4.62.orig/arch/arm/mach-hisi/platmcpm.c linux-4.4.62/arch/arm/mach-hisi/platmcpm.c
  607. --- linux-4.4.62.orig/arch/arm/mach-hisi/platmcpm.c 2017-04-18 07:15:37.000000000 +0200
  608. +++ linux-4.4.62/arch/arm/mach-hisi/platmcpm.c 2017-04-18 17:38:07.906638076 +0200
  609. @@ -61,7 +61,7 @@
  610. static void __iomem *sysctrl, *fabric;
  611. static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER];
  612. -static DEFINE_SPINLOCK(boot_lock);
  613. +static DEFINE_RAW_SPINLOCK(boot_lock);
  614. static u32 fabric_phys_addr;
  615. /*
  616. * [0]: bootwrapper physical address
  617. @@ -113,7 +113,7 @@
  618. if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
  619. return -EINVAL;
  620. - spin_lock_irq(&boot_lock);
  621. + raw_spin_lock_irq(&boot_lock);
  622. if (hip04_cpu_table[cluster][cpu])
  623. goto out;
  624. @@ -147,7 +147,7 @@
  625. out:
  626. hip04_cpu_table[cluster][cpu]++;
  627. - spin_unlock_irq(&boot_lock);
  628. + raw_spin_unlock_irq(&boot_lock);
  629. return 0;
  630. }
  631. @@ -162,11 +162,11 @@
  632. cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
  633. cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
  634. - spin_lock(&boot_lock);
  635. + raw_spin_lock(&boot_lock);
  636. hip04_cpu_table[cluster][cpu]--;
  637. if (hip04_cpu_table[cluster][cpu] == 1) {
  638. /* A power_up request went ahead of us. */
  639. - spin_unlock(&boot_lock);
  640. + raw_spin_unlock(&boot_lock);
  641. return;
  642. } else if (hip04_cpu_table[cluster][cpu] > 1) {
  643. pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu);
  644. @@ -174,7 +174,7 @@
  645. }
  646. last_man = hip04_cluster_is_down(cluster);
  647. - spin_unlock(&boot_lock);
  648. + raw_spin_unlock(&boot_lock);
  649. if (last_man) {
  650. /* Since it's Cortex A15, disable L2 prefetching. */
  651. asm volatile(
  652. @@ -203,7 +203,7 @@
  653. cpu >= HIP04_MAX_CPUS_PER_CLUSTER);
  654. count = TIMEOUT_MSEC / POLL_MSEC;
  655. - spin_lock_irq(&boot_lock);
  656. + raw_spin_lock_irq(&boot_lock);
  657. for (tries = 0; tries < count; tries++) {
  658. if (hip04_cpu_table[cluster][cpu])
  659. goto err;
  660. @@ -211,10 +211,10 @@
  661. data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster));
  662. if (data & CORE_WFI_STATUS(cpu))
  663. break;
  664. - spin_unlock_irq(&boot_lock);
  665. + raw_spin_unlock_irq(&boot_lock);
  666. /* Wait for clean L2 when the whole cluster is down. */
  667. msleep(POLL_MSEC);
  668. - spin_lock_irq(&boot_lock);
  669. + raw_spin_lock_irq(&boot_lock);
  670. }
  671. if (tries >= count)
  672. goto err;
  673. @@ -231,10 +231,10 @@
  674. goto err;
  675. if (hip04_cluster_is_down(cluster))
  676. hip04_set_snoop_filter(cluster, 0);
  677. - spin_unlock_irq(&boot_lock);
  678. + raw_spin_unlock_irq(&boot_lock);
  679. return 1;
  680. err:
  681. - spin_unlock_irq(&boot_lock);
  682. + raw_spin_unlock_irq(&boot_lock);
  683. return 0;
  684. }
  685. #endif
  686. diff -Nur linux-4.4.62.orig/arch/arm/mach-imx/Kconfig linux-4.4.62/arch/arm/mach-imx/Kconfig
  687. --- linux-4.4.62.orig/arch/arm/mach-imx/Kconfig 2017-04-18 07:15:37.000000000 +0200
  688. +++ linux-4.4.62/arch/arm/mach-imx/Kconfig 2017-04-18 17:38:07.906638076 +0200
  689. @@ -524,7 +524,7 @@
  690. bool "i.MX6 Quad/DualLite support"
  691. select ARM_ERRATA_764369 if SMP
  692. select HAVE_ARM_SCU if SMP
  693. - select HAVE_ARM_TWD if SMP
  694. + select HAVE_ARM_TWD
  695. select PCI_DOMAINS if PCI
  696. select PINCTRL_IMX6Q
  697. select SOC_IMX6
  698. diff -Nur linux-4.4.62.orig/arch/arm/mach-omap2/omap-smp.c linux-4.4.62/arch/arm/mach-omap2/omap-smp.c
  699. --- linux-4.4.62.orig/arch/arm/mach-omap2/omap-smp.c 2017-04-18 07:15:37.000000000 +0200
  700. +++ linux-4.4.62/arch/arm/mach-omap2/omap-smp.c 2017-04-18 17:38:07.906638076 +0200
  701. @@ -43,7 +43,7 @@
  702. /* SCU base address */
  703. static void __iomem *scu_base;
  704. -static DEFINE_SPINLOCK(boot_lock);
  705. +static DEFINE_RAW_SPINLOCK(boot_lock);
  706. void __iomem *omap4_get_scu_base(void)
  707. {
  708. @@ -74,8 +74,8 @@
  709. /*
  710. * Synchronise with the boot thread.
  711. */
  712. - spin_lock(&boot_lock);
  713. - spin_unlock(&boot_lock);
  714. + raw_spin_lock(&boot_lock);
  715. + raw_spin_unlock(&boot_lock);
  716. }
  717. static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
  718. @@ -89,7 +89,7 @@
  719. * Set synchronisation state between this boot processor
  720. * and the secondary one
  721. */
  722. - spin_lock(&boot_lock);
  723. + raw_spin_lock(&boot_lock);
  724. /*
  725. * Update the AuxCoreBoot0 with boot state for secondary core.
  726. @@ -166,7 +166,7 @@
  727. * Now the secondary core is starting up let it run its
  728. * calibrations, then wait for it to finish
  729. */
  730. - spin_unlock(&boot_lock);
  731. + raw_spin_unlock(&boot_lock);
  732. return 0;
  733. }
  734. diff -Nur linux-4.4.62.orig/arch/arm/mach-prima2/platsmp.c linux-4.4.62/arch/arm/mach-prima2/platsmp.c
  735. --- linux-4.4.62.orig/arch/arm/mach-prima2/platsmp.c 2017-04-18 07:15:37.000000000 +0200
  736. +++ linux-4.4.62/arch/arm/mach-prima2/platsmp.c 2017-04-18 17:38:07.906638076 +0200
  737. @@ -22,7 +22,7 @@
  738. static void __iomem *clk_base;
  739. -static DEFINE_SPINLOCK(boot_lock);
  740. +static DEFINE_RAW_SPINLOCK(boot_lock);
  741. static void sirfsoc_secondary_init(unsigned int cpu)
  742. {
  743. @@ -36,8 +36,8 @@
  744. /*
  745. * Synchronise with the boot thread.
  746. */
  747. - spin_lock(&boot_lock);
  748. - spin_unlock(&boot_lock);
  749. + raw_spin_lock(&boot_lock);
  750. + raw_spin_unlock(&boot_lock);
  751. }
  752. static const struct of_device_id clk_ids[] = {
  753. @@ -75,7 +75,7 @@
  754. /* make sure write buffer is drained */
  755. mb();
  756. - spin_lock(&boot_lock);
  757. + raw_spin_lock(&boot_lock);
  758. /*
  759. * The secondary processor is waiting to be released from
  760. @@ -107,7 +107,7 @@
  761. * now the secondary core is starting up let it run its
  762. * calibrations, then wait for it to finish
  763. */
  764. - spin_unlock(&boot_lock);
  765. + raw_spin_unlock(&boot_lock);
  766. return pen_release != -1 ? -ENOSYS : 0;
  767. }
  768. diff -Nur linux-4.4.62.orig/arch/arm/mach-qcom/platsmp.c linux-4.4.62/arch/arm/mach-qcom/platsmp.c
  769. --- linux-4.4.62.orig/arch/arm/mach-qcom/platsmp.c 2017-04-18 07:15:37.000000000 +0200
  770. +++ linux-4.4.62/arch/arm/mach-qcom/platsmp.c 2017-04-18 17:38:07.906638076 +0200
  771. @@ -46,7 +46,7 @@
  772. extern void secondary_startup_arm(void);
  773. -static DEFINE_SPINLOCK(boot_lock);
  774. +static DEFINE_RAW_SPINLOCK(boot_lock);
  775. #ifdef CONFIG_HOTPLUG_CPU
  776. static void qcom_cpu_die(unsigned int cpu)
  777. @@ -60,8 +60,8 @@
  778. /*
  779. * Synchronise with the boot thread.
  780. */
  781. - spin_lock(&boot_lock);
  782. - spin_unlock(&boot_lock);
  783. + raw_spin_lock(&boot_lock);
  784. + raw_spin_unlock(&boot_lock);
  785. }
  786. static int scss_release_secondary(unsigned int cpu)
  787. @@ -284,7 +284,7 @@
  788. * set synchronisation state between this boot processor
  789. * and the secondary one
  790. */
  791. - spin_lock(&boot_lock);
  792. + raw_spin_lock(&boot_lock);
  793. /*
  794. * Send the secondary CPU a soft interrupt, thereby causing
  795. @@ -297,7 +297,7 @@
  796. * now the secondary core is starting up let it run its
  797. * calibrations, then wait for it to finish
  798. */
  799. - spin_unlock(&boot_lock);
  800. + raw_spin_unlock(&boot_lock);
  801. return ret;
  802. }
  803. diff -Nur linux-4.4.62.orig/arch/arm/mach-spear/platsmp.c linux-4.4.62/arch/arm/mach-spear/platsmp.c
  804. --- linux-4.4.62.orig/arch/arm/mach-spear/platsmp.c 2017-04-18 07:15:37.000000000 +0200
  805. +++ linux-4.4.62/arch/arm/mach-spear/platsmp.c 2017-04-18 17:38:07.906638076 +0200
  806. @@ -32,7 +32,7 @@
  807. sync_cache_w(&pen_release);
  808. }
  809. -static DEFINE_SPINLOCK(boot_lock);
  810. +static DEFINE_RAW_SPINLOCK(boot_lock);
  811. static void __iomem *scu_base = IOMEM(VA_SCU_BASE);
  812. @@ -47,8 +47,8 @@
  813. /*
  814. * Synchronise with the boot thread.
  815. */
  816. - spin_lock(&boot_lock);
  817. - spin_unlock(&boot_lock);
  818. + raw_spin_lock(&boot_lock);
  819. + raw_spin_unlock(&boot_lock);
  820. }
  821. static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
  822. @@ -59,7 +59,7 @@
  823. * set synchronisation state between this boot processor
  824. * and the secondary one
  825. */
  826. - spin_lock(&boot_lock);
  827. + raw_spin_lock(&boot_lock);
  828. /*
  829. * The secondary processor is waiting to be released from
  830. @@ -84,7 +84,7 @@
  831. * now the secondary core is starting up let it run its
  832. * calibrations, then wait for it to finish
  833. */
  834. - spin_unlock(&boot_lock);
  835. + raw_spin_unlock(&boot_lock);
  836. return pen_release != -1 ? -ENOSYS : 0;
  837. }
  838. diff -Nur linux-4.4.62.orig/arch/arm/mach-sti/platsmp.c linux-4.4.62/arch/arm/mach-sti/platsmp.c
  839. --- linux-4.4.62.orig/arch/arm/mach-sti/platsmp.c 2017-04-18 07:15:37.000000000 +0200
  840. +++ linux-4.4.62/arch/arm/mach-sti/platsmp.c 2017-04-18 17:38:07.906638076 +0200
  841. @@ -35,7 +35,7 @@
  842. sync_cache_w(&pen_release);
  843. }
  844. -static DEFINE_SPINLOCK(boot_lock);
  845. +static DEFINE_RAW_SPINLOCK(boot_lock);
  846. static void sti_secondary_init(unsigned int cpu)
  847. {
  848. @@ -48,8 +48,8 @@
  849. /*
  850. * Synchronise with the boot thread.
  851. */
  852. - spin_lock(&boot_lock);
  853. - spin_unlock(&boot_lock);
  854. + raw_spin_lock(&boot_lock);
  855. + raw_spin_unlock(&boot_lock);
  856. }
  857. static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle)
  858. @@ -60,7 +60,7 @@
  859. * set synchronisation state between this boot processor
  860. * and the secondary one
  861. */
  862. - spin_lock(&boot_lock);
  863. + raw_spin_lock(&boot_lock);
  864. /*
  865. * The secondary processor is waiting to be released from
  866. @@ -91,7 +91,7 @@
  867. * now the secondary core is starting up let it run its
  868. * calibrations, then wait for it to finish
  869. */
  870. - spin_unlock(&boot_lock);
  871. + raw_spin_unlock(&boot_lock);
  872. return pen_release != -1 ? -ENOSYS : 0;
  873. }
  874. diff -Nur linux-4.4.62.orig/arch/arm/mm/fault.c linux-4.4.62/arch/arm/mm/fault.c
  875. --- linux-4.4.62.orig/arch/arm/mm/fault.c 2017-04-18 07:15:37.000000000 +0200
  876. +++ linux-4.4.62/arch/arm/mm/fault.c 2017-04-18 17:38:07.906638076 +0200
  877. @@ -430,6 +430,9 @@
  878. if (addr < TASK_SIZE)
  879. return do_page_fault(addr, fsr, regs);
  880. + if (interrupts_enabled(regs))
  881. + local_irq_enable();
  882. +
  883. if (user_mode(regs))
  884. goto bad_area;
  885. @@ -497,6 +500,9 @@
  886. static int
  887. do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
  888. {
  889. + if (interrupts_enabled(regs))
  890. + local_irq_enable();
  891. +
  892. do_bad_area(addr, fsr, regs);
  893. return 0;
  894. }
  895. diff -Nur linux-4.4.62.orig/arch/arm/mm/highmem.c linux-4.4.62/arch/arm/mm/highmem.c
  896. --- linux-4.4.62.orig/arch/arm/mm/highmem.c 2017-04-18 07:15:37.000000000 +0200
  897. +++ linux-4.4.62/arch/arm/mm/highmem.c 2017-04-18 17:38:07.906638076 +0200
  898. @@ -34,6 +34,11 @@
  899. return *ptep;
  900. }
  901. +static unsigned int fixmap_idx(int type)
  902. +{
  903. + return FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
  904. +}
  905. +
  906. void *kmap(struct page *page)
  907. {
  908. might_sleep();
  909. @@ -54,12 +59,13 @@
  910. void *kmap_atomic(struct page *page)
  911. {
  912. + pte_t pte = mk_pte(page, kmap_prot);
  913. unsigned int idx;
  914. unsigned long vaddr;
  915. void *kmap;
  916. int type;
  917. - preempt_disable();
  918. + preempt_disable_nort();
  919. pagefault_disable();
  920. if (!PageHighMem(page))
  921. return page_address(page);
  922. @@ -79,7 +85,7 @@
  923. type = kmap_atomic_idx_push();
  924. - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
  925. + idx = fixmap_idx(type);
  926. vaddr = __fix_to_virt(idx);
  927. #ifdef CONFIG_DEBUG_HIGHMEM
  928. /*
  929. @@ -93,7 +99,10 @@
  930. * in place, so the contained TLB flush ensures the TLB is updated
  931. * with the new mapping.
  932. */
  933. - set_fixmap_pte(idx, mk_pte(page, kmap_prot));
  934. +#ifdef CONFIG_PREEMPT_RT_FULL
  935. + current->kmap_pte[type] = pte;
  936. +#endif
  937. + set_fixmap_pte(idx, pte);
  938. return (void *)vaddr;
  939. }
  940. @@ -106,44 +115,75 @@
  941. if (kvaddr >= (void *)FIXADDR_START) {
  942. type = kmap_atomic_idx();
  943. - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
  944. + idx = fixmap_idx(type);
  945. if (cache_is_vivt())
  946. __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
  947. +#ifdef CONFIG_PREEMPT_RT_FULL
  948. + current->kmap_pte[type] = __pte(0);
  949. +#endif
  950. #ifdef CONFIG_DEBUG_HIGHMEM
  951. BUG_ON(vaddr != __fix_to_virt(idx));
  952. - set_fixmap_pte(idx, __pte(0));
  953. #else
  954. (void) idx; /* to kill a warning */
  955. #endif
  956. + set_fixmap_pte(idx, __pte(0));
  957. kmap_atomic_idx_pop();
  958. } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) {
  959. /* this address was obtained through kmap_high_get() */
  960. kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)]));
  961. }
  962. pagefault_enable();
  963. - preempt_enable();
  964. + preempt_enable_nort();
  965. }
  966. EXPORT_SYMBOL(__kunmap_atomic);
  967. void *kmap_atomic_pfn(unsigned long pfn)
  968. {
  969. + pte_t pte = pfn_pte(pfn, kmap_prot);
  970. unsigned long vaddr;
  971. int idx, type;
  972. struct page *page = pfn_to_page(pfn);
  973. - preempt_disable();
  974. + preempt_disable_nort();
  975. pagefault_disable();
  976. if (!PageHighMem(page))
  977. return page_address(page);
  978. type = kmap_atomic_idx_push();
  979. - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
  980. + idx = fixmap_idx(type);
  981. vaddr = __fix_to_virt(idx);
  982. #ifdef CONFIG_DEBUG_HIGHMEM
  983. BUG_ON(!pte_none(get_fixmap_pte(vaddr)));
  984. #endif
  985. - set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot));
  986. +#ifdef CONFIG_PREEMPT_RT_FULL
  987. + current->kmap_pte[type] = pte;
  988. +#endif
  989. + set_fixmap_pte(idx, pte);
  990. return (void *)vaddr;
  991. }
  992. +#if defined CONFIG_PREEMPT_RT_FULL
  993. +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
  994. +{
  995. + int i;
  996. +
  997. + /*
  998. + * Clear @prev's kmap_atomic mappings
  999. + */
  1000. + for (i = 0; i < prev_p->kmap_idx; i++) {
  1001. + int idx = fixmap_idx(i);
  1002. +
  1003. + set_fixmap_pte(idx, __pte(0));
  1004. + }
  1005. + /*
  1006. + * Restore @next_p's kmap_atomic mappings
  1007. + */
  1008. + for (i = 0; i < next_p->kmap_idx; i++) {
  1009. + int idx = fixmap_idx(i);
  1010. +
  1011. + if (!pte_none(next_p->kmap_pte[i]))
  1012. + set_fixmap_pte(idx, next_p->kmap_pte[i]);
  1013. + }
  1014. +}
  1015. +#endif
  1016. diff -Nur linux-4.4.62.orig/arch/arm/plat-versatile/platsmp.c linux-4.4.62/arch/arm/plat-versatile/platsmp.c
  1017. --- linux-4.4.62.orig/arch/arm/plat-versatile/platsmp.c 2017-04-18 07:15:37.000000000 +0200
  1018. +++ linux-4.4.62/arch/arm/plat-versatile/platsmp.c 2017-04-18 17:38:07.906638076 +0200
  1019. @@ -30,7 +30,7 @@
  1020. sync_cache_w(&pen_release);
  1021. }
  1022. -static DEFINE_SPINLOCK(boot_lock);
  1023. +static DEFINE_RAW_SPINLOCK(boot_lock);
  1024. void versatile_secondary_init(unsigned int cpu)
  1025. {
  1026. @@ -43,8 +43,8 @@
  1027. /*
  1028. * Synchronise with the boot thread.
  1029. */
  1030. - spin_lock(&boot_lock);
  1031. - spin_unlock(&boot_lock);
  1032. + raw_spin_lock(&boot_lock);
  1033. + raw_spin_unlock(&boot_lock);
  1034. }
  1035. int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1036. @@ -55,7 +55,7 @@
  1037. * Set synchronisation state between this boot processor
  1038. * and the secondary one
  1039. */
  1040. - spin_lock(&boot_lock);
  1041. + raw_spin_lock(&boot_lock);
  1042. /*
  1043. * This is really belt and braces; we hold unintended secondary
  1044. @@ -85,7 +85,7 @@
  1045. * now the secondary core is starting up let it run its
  1046. * calibrations, then wait for it to finish
  1047. */
  1048. - spin_unlock(&boot_lock);
  1049. + raw_spin_unlock(&boot_lock);
  1050. return pen_release != -1 ? -ENOSYS : 0;
  1051. }
  1052. diff -Nur linux-4.4.62.orig/arch/arm64/include/asm/thread_info.h linux-4.4.62/arch/arm64/include/asm/thread_info.h
  1053. --- linux-4.4.62.orig/arch/arm64/include/asm/thread_info.h 2017-04-18 07:15:37.000000000 +0200
  1054. +++ linux-4.4.62/arch/arm64/include/asm/thread_info.h 2017-04-18 17:38:07.906638076 +0200
  1055. @@ -49,6 +49,7 @@
  1056. mm_segment_t addr_limit; /* address limit */
  1057. struct task_struct *task; /* main task structure */
  1058. int preempt_count; /* 0 => preemptable, <0 => bug */
  1059. + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
  1060. int cpu; /* cpu */
  1061. };
  1062. @@ -103,6 +104,7 @@
  1063. #define TIF_NEED_RESCHED 1
  1064. #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
  1065. #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
  1066. +#define TIF_NEED_RESCHED_LAZY 4
  1067. #define TIF_NOHZ 7
  1068. #define TIF_SYSCALL_TRACE 8
  1069. #define TIF_SYSCALL_AUDIT 9
  1070. @@ -118,6 +120,7 @@
  1071. #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
  1072. #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
  1073. #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE)
  1074. +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
  1075. #define _TIF_NOHZ (1 << TIF_NOHZ)
  1076. #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
  1077. #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
  1078. @@ -126,7 +129,8 @@
  1079. #define _TIF_32BIT (1 << TIF_32BIT)
  1080. #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
  1081. - _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE)
  1082. + _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
  1083. + _TIF_NEED_RESCHED_LAZY)
  1084. #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
  1085. _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
  1086. diff -Nur linux-4.4.62.orig/arch/arm64/Kconfig linux-4.4.62/arch/arm64/Kconfig
  1087. --- linux-4.4.62.orig/arch/arm64/Kconfig 2017-04-18 07:15:37.000000000 +0200
  1088. +++ linux-4.4.62/arch/arm64/Kconfig 2017-04-18 17:38:07.906638076 +0200
  1089. @@ -76,6 +76,7 @@
  1090. select HAVE_PERF_REGS
  1091. select HAVE_PERF_USER_STACK_DUMP
  1092. select HAVE_RCU_TABLE_FREE
  1093. + select HAVE_PREEMPT_LAZY
  1094. select HAVE_SYSCALL_TRACEPOINTS
  1095. select IOMMU_DMA if IOMMU_SUPPORT
  1096. select IRQ_DOMAIN
  1097. @@ -582,7 +583,7 @@
  1098. config XEN
  1099. bool "Xen guest support on ARM64"
  1100. - depends on ARM64 && OF
  1101. + depends on ARM64 && OF && !PREEMPT_RT_FULL
  1102. select SWIOTLB_XEN
  1103. help
  1104. Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64.
  1105. diff -Nur linux-4.4.62.orig/arch/arm64/kernel/asm-offsets.c linux-4.4.62/arch/arm64/kernel/asm-offsets.c
  1106. --- linux-4.4.62.orig/arch/arm64/kernel/asm-offsets.c 2017-04-18 07:15:37.000000000 +0200
  1107. +++ linux-4.4.62/arch/arm64/kernel/asm-offsets.c 2017-04-18 17:38:07.906638076 +0200
  1108. @@ -35,6 +35,7 @@
  1109. BLANK();
  1110. DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
  1111. DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
  1112. + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
  1113. DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
  1114. DEFINE(TI_TASK, offsetof(struct thread_info, task));
  1115. DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
  1116. diff -Nur linux-4.4.62.orig/arch/arm64/kernel/entry.S linux-4.4.62/arch/arm64/kernel/entry.S
  1117. --- linux-4.4.62.orig/arch/arm64/kernel/entry.S 2017-04-18 07:15:37.000000000 +0200
  1118. +++ linux-4.4.62/arch/arm64/kernel/entry.S 2017-04-18 17:38:07.910638232 +0200
  1119. @@ -376,11 +376,16 @@
  1120. #ifdef CONFIG_PREEMPT
  1121. get_thread_info tsk
  1122. ldr w24, [tsk, #TI_PREEMPT] // get preempt count
  1123. - cbnz w24, 1f // preempt count != 0
  1124. + cbnz w24, 2f // preempt count != 0
  1125. ldr x0, [tsk, #TI_FLAGS] // get flags
  1126. - tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
  1127. - bl el1_preempt
  1128. + tbnz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
  1129. +
  1130. + ldr w24, [tsk, #TI_PREEMPT_LAZY] // get preempt lazy count
  1131. + cbnz w24, 2f // preempt lazy count != 0
  1132. + tbz x0, #TIF_NEED_RESCHED_LAZY, 2f // needs rescheduling?
  1133. 1:
  1134. + bl el1_preempt
  1135. +2:
  1136. #endif
  1137. #ifdef CONFIG_TRACE_IRQFLAGS
  1138. bl trace_hardirqs_on
  1139. @@ -394,6 +399,7 @@
  1140. 1: bl preempt_schedule_irq // irq en/disable is done inside
  1141. ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS
  1142. tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling?
  1143. + tbnz x0, #TIF_NEED_RESCHED_LAZY, 1b // needs rescheduling?
  1144. ret x24
  1145. #endif
  1146. @@ -638,6 +644,7 @@
  1147. */
  1148. work_pending:
  1149. tbnz x1, #TIF_NEED_RESCHED, work_resched
  1150. + tbnz x1, #TIF_NEED_RESCHED_LAZY, work_resched
  1151. /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */
  1152. ldr x2, [sp, #S_PSTATE]
  1153. mov x0, sp // 'regs'
  1154. diff -Nur linux-4.4.62.orig/arch/Kconfig linux-4.4.62/arch/Kconfig
  1155. --- linux-4.4.62.orig/arch/Kconfig 2017-04-18 07:15:37.000000000 +0200
  1156. +++ linux-4.4.62/arch/Kconfig 2017-04-18 17:38:07.902637922 +0200
  1157. @@ -9,6 +9,7 @@
  1158. tristate "OProfile system profiling"
  1159. depends on PROFILING
  1160. depends on HAVE_OPROFILE
  1161. + depends on !PREEMPT_RT_FULL
  1162. select RING_BUFFER
  1163. select RING_BUFFER_ALLOW_SWAP
  1164. help
  1165. @@ -52,6 +53,7 @@
  1166. config JUMP_LABEL
  1167. bool "Optimize very unlikely/likely branches"
  1168. depends on HAVE_ARCH_JUMP_LABEL
  1169. + depends on (!INTERRUPT_OFF_HIST && !PREEMPT_OFF_HIST && !WAKEUP_LATENCY_HIST && !MISSED_TIMER_OFFSETS_HIST)
  1170. help
  1171. This option enables a transparent branch optimization that
  1172. makes certain almost-always-true or almost-always-false branch
  1173. diff -Nur linux-4.4.62.orig/arch/mips/Kconfig linux-4.4.62/arch/mips/Kconfig
  1174. --- linux-4.4.62.orig/arch/mips/Kconfig 2017-04-18 07:15:37.000000000 +0200
  1175. +++ linux-4.4.62/arch/mips/Kconfig 2017-04-18 17:38:08.026642730 +0200
  1176. @@ -2411,7 +2411,7 @@
  1177. #
  1178. config HIGHMEM
  1179. bool "High Memory Support"
  1180. - depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA
  1181. + depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA && !PREEMPT_RT_FULL
  1182. config CPU_SUPPORTS_HIGHMEM
  1183. bool
  1184. diff -Nur linux-4.4.62.orig/arch/mips/kvm/mips.c linux-4.4.62/arch/mips/kvm/mips.c
  1185. --- linux-4.4.62.orig/arch/mips/kvm/mips.c 2017-04-18 07:15:37.000000000 +0200
  1186. +++ linux-4.4.62/arch/mips/kvm/mips.c 2017-04-18 17:38:08.026642730 +0200
  1187. @@ -454,8 +454,8 @@
  1188. dvcpu->arch.wait = 0;
  1189. - if (waitqueue_active(&dvcpu->wq))
  1190. - wake_up_interruptible(&dvcpu->wq);
  1191. + if (swait_active(&dvcpu->wq))
  1192. + swake_up(&dvcpu->wq);
  1193. return 0;
  1194. }
  1195. @@ -1183,8 +1183,8 @@
  1196. kvm_mips_callbacks->queue_timer_int(vcpu);
  1197. vcpu->arch.wait = 0;
  1198. - if (waitqueue_active(&vcpu->wq))
  1199. - wake_up_interruptible(&vcpu->wq);
  1200. + if (swait_active(&vcpu->wq))
  1201. + swake_up(&vcpu->wq);
  1202. }
  1203. /* low level hrtimer wake routine */
  1204. diff -Nur linux-4.4.62.orig/arch/powerpc/include/asm/kvm_host.h linux-4.4.62/arch/powerpc/include/asm/kvm_host.h
  1205. --- linux-4.4.62.orig/arch/powerpc/include/asm/kvm_host.h 2017-04-18 07:15:37.000000000 +0200
  1206. +++ linux-4.4.62/arch/powerpc/include/asm/kvm_host.h 2017-04-18 17:38:08.026642730 +0200
  1207. @@ -286,7 +286,7 @@
  1208. struct list_head runnable_threads;
  1209. struct list_head preempt_list;
  1210. spinlock_t lock;
  1211. - wait_queue_head_t wq;
  1212. + struct swait_queue_head wq;
  1213. spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
  1214. u64 stolen_tb;
  1215. u64 preempt_tb;
  1216. @@ -627,7 +627,7 @@
  1217. u8 prodded;
  1218. u32 last_inst;
  1219. - wait_queue_head_t *wqp;
  1220. + struct swait_queue_head *wqp;
  1221. struct kvmppc_vcore *vcore;
  1222. int ret;
  1223. int trap;
  1224. diff -Nur linux-4.4.62.orig/arch/powerpc/include/asm/thread_info.h linux-4.4.62/arch/powerpc/include/asm/thread_info.h
  1225. --- linux-4.4.62.orig/arch/powerpc/include/asm/thread_info.h 2017-04-18 07:15:37.000000000 +0200
  1226. +++ linux-4.4.62/arch/powerpc/include/asm/thread_info.h 2017-04-18 17:38:08.026642730 +0200
  1227. @@ -42,6 +42,8 @@
  1228. int cpu; /* cpu we're on */
  1229. int preempt_count; /* 0 => preemptable,
  1230. <0 => BUG */
  1231. + int preempt_lazy_count; /* 0 => preemptable,
  1232. + <0 => BUG */
  1233. unsigned long local_flags; /* private flags for thread */
  1234. /* low level flags - has atomic operations done on it */
  1235. @@ -82,8 +84,7 @@
  1236. #define TIF_SYSCALL_TRACE 0 /* syscall trace active */
  1237. #define TIF_SIGPENDING 1 /* signal pending */
  1238. #define TIF_NEED_RESCHED 2 /* rescheduling necessary */
  1239. -#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling
  1240. - TIF_NEED_RESCHED */
  1241. +#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling necessary */
  1242. #define TIF_32BIT 4 /* 32 bit binary */
  1243. #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */
  1244. #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
  1245. @@ -101,6 +102,8 @@
  1246. #if defined(CONFIG_PPC64)
  1247. #define TIF_ELF2ABI 18 /* function descriptors must die! */
  1248. #endif
  1249. +#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling
  1250. + TIF_NEED_RESCHED */
  1251. /* as above, but as bit values */
  1252. #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
  1253. @@ -119,14 +122,16 @@
  1254. #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
  1255. #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE)
  1256. #define _TIF_NOHZ (1<<TIF_NOHZ)
  1257. +#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY)
  1258. #define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
  1259. _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
  1260. _TIF_NOHZ)
  1261. #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
  1262. _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
  1263. - _TIF_RESTORE_TM)
  1264. + _TIF_RESTORE_TM | _TIF_NEED_RESCHED_LAZY)
  1265. #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
  1266. +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
  1267. /* Bits in local_flags */
  1268. /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
  1269. diff -Nur linux-4.4.62.orig/arch/powerpc/Kconfig linux-4.4.62/arch/powerpc/Kconfig
  1270. --- linux-4.4.62.orig/arch/powerpc/Kconfig 2017-04-18 07:15:37.000000000 +0200
  1271. +++ linux-4.4.62/arch/powerpc/Kconfig 2017-04-18 17:38:08.026642730 +0200
  1272. @@ -60,10 +60,11 @@
  1273. config RWSEM_GENERIC_SPINLOCK
  1274. bool
  1275. + default y if PREEMPT_RT_FULL
  1276. config RWSEM_XCHGADD_ALGORITHM
  1277. bool
  1278. - default y
  1279. + default y if !PREEMPT_RT_FULL
  1280. config GENERIC_LOCKBREAK
  1281. bool
  1282. @@ -141,6 +142,7 @@
  1283. select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
  1284. select GENERIC_STRNCPY_FROM_USER
  1285. select GENERIC_STRNLEN_USER
  1286. + select HAVE_PREEMPT_LAZY
  1287. select HAVE_MOD_ARCH_SPECIFIC
  1288. select MODULES_USE_ELF_RELA
  1289. select CLONE_BACKWARDS
  1290. @@ -319,7 +321,7 @@
  1291. config HIGHMEM
  1292. bool "High memory support"
  1293. - depends on PPC32
  1294. + depends on PPC32 && !PREEMPT_RT_FULL
  1295. source kernel/Kconfig.hz
  1296. source kernel/Kconfig.preempt
  1297. diff -Nur linux-4.4.62.orig/arch/powerpc/kernel/asm-offsets.c linux-4.4.62/arch/powerpc/kernel/asm-offsets.c
  1298. --- linux-4.4.62.orig/arch/powerpc/kernel/asm-offsets.c 2017-04-18 07:15:37.000000000 +0200
  1299. +++ linux-4.4.62/arch/powerpc/kernel/asm-offsets.c 2017-04-18 17:38:08.026642730 +0200
  1300. @@ -160,6 +160,7 @@
  1301. DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
  1302. DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
  1303. DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
  1304. + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
  1305. DEFINE(TI_TASK, offsetof(struct thread_info, task));
  1306. DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
  1307. diff -Nur linux-4.4.62.orig/arch/powerpc/kernel/entry_32.S linux-4.4.62/arch/powerpc/kernel/entry_32.S
  1308. --- linux-4.4.62.orig/arch/powerpc/kernel/entry_32.S 2017-04-18 07:15:37.000000000 +0200
  1309. +++ linux-4.4.62/arch/powerpc/kernel/entry_32.S 2017-04-18 17:38:08.030642885 +0200
  1310. @@ -818,7 +818,14 @@
  1311. cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
  1312. bne restore
  1313. andi. r8,r8,_TIF_NEED_RESCHED
  1314. + bne+ 1f
  1315. + lwz r0,TI_PREEMPT_LAZY(r9)
  1316. + cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
  1317. + bne restore
  1318. + lwz r0,TI_FLAGS(r9)
  1319. + andi. r0,r0,_TIF_NEED_RESCHED_LAZY
  1320. beq+ restore
  1321. +1:
  1322. lwz r3,_MSR(r1)
  1323. andi. r0,r3,MSR_EE /* interrupts off? */
  1324. beq restore /* don't schedule if so */
  1325. @@ -829,11 +836,11 @@
  1326. */
  1327. bl trace_hardirqs_off
  1328. #endif
  1329. -1: bl preempt_schedule_irq
  1330. +2: bl preempt_schedule_irq
  1331. CURRENT_THREAD_INFO(r9, r1)
  1332. lwz r3,TI_FLAGS(r9)
  1333. - andi. r0,r3,_TIF_NEED_RESCHED
  1334. - bne- 1b
  1335. + andi. r0,r3,_TIF_NEED_RESCHED_MASK
  1336. + bne- 2b
  1337. #ifdef CONFIG_TRACE_IRQFLAGS
  1338. /* And now, to properly rebalance the above, we tell lockdep they
  1339. * are being turned back on, which will happen when we return
  1340. @@ -1154,7 +1161,7 @@
  1341. #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
  1342. do_work: /* r10 contains MSR_KERNEL here */
  1343. - andi. r0,r9,_TIF_NEED_RESCHED
  1344. + andi. r0,r9,_TIF_NEED_RESCHED_MASK
  1345. beq do_user_signal
  1346. do_resched: /* r10 contains MSR_KERNEL here */
  1347. @@ -1175,7 +1182,7 @@
  1348. MTMSRD(r10) /* disable interrupts */
  1349. CURRENT_THREAD_INFO(r9, r1)
  1350. lwz r9,TI_FLAGS(r9)
  1351. - andi. r0,r9,_TIF_NEED_RESCHED
  1352. + andi. r0,r9,_TIF_NEED_RESCHED_MASK
  1353. bne- do_resched
  1354. andi. r0,r9,_TIF_USER_WORK_MASK
  1355. beq restore_user
  1356. diff -Nur linux-4.4.62.orig/arch/powerpc/kernel/entry_64.S linux-4.4.62/arch/powerpc/kernel/entry_64.S
  1357. --- linux-4.4.62.orig/arch/powerpc/kernel/entry_64.S 2017-04-18 07:15:37.000000000 +0200
  1358. +++ linux-4.4.62/arch/powerpc/kernel/entry_64.S 2017-04-18 17:38:08.030642885 +0200
  1359. @@ -683,7 +683,7 @@
  1360. #else
  1361. beq restore
  1362. #endif
  1363. -1: andi. r0,r4,_TIF_NEED_RESCHED
  1364. +1: andi. r0,r4,_TIF_NEED_RESCHED_MASK
  1365. beq 2f
  1366. bl restore_interrupts
  1367. SCHEDULE_USER
  1368. @@ -745,10 +745,18 @@
  1369. #ifdef CONFIG_PREEMPT
  1370. /* Check if we need to preempt */
  1371. + lwz r8,TI_PREEMPT(r9)
  1372. + cmpwi 0,r8,0 /* if non-zero, just restore regs and return */
  1373. + bne restore
  1374. andi. r0,r4,_TIF_NEED_RESCHED
  1375. + bne+ check_count
  1376. +
  1377. + andi. r0,r4,_TIF_NEED_RESCHED_LAZY
  1378. beq+ restore
  1379. + lwz r8,TI_PREEMPT_LAZY(r9)
  1380. +
  1381. /* Check that preempt_count() == 0 and interrupts are enabled */
  1382. - lwz r8,TI_PREEMPT(r9)
  1383. +check_count:
  1384. cmpwi cr1,r8,0
  1385. ld r0,SOFTE(r1)
  1386. cmpdi r0,0
  1387. @@ -765,7 +773,7 @@
  1388. /* Re-test flags and eventually loop */
  1389. CURRENT_THREAD_INFO(r9, r1)
  1390. ld r4,TI_FLAGS(r9)
  1391. - andi. r0,r4,_TIF_NEED_RESCHED
  1392. + andi. r0,r4,_TIF_NEED_RESCHED_MASK
  1393. bne 1b
  1394. /*
  1395. diff -Nur linux-4.4.62.orig/arch/powerpc/kernel/irq.c linux-4.4.62/arch/powerpc/kernel/irq.c
  1396. --- linux-4.4.62.orig/arch/powerpc/kernel/irq.c 2017-04-18 07:15:37.000000000 +0200
  1397. +++ linux-4.4.62/arch/powerpc/kernel/irq.c 2017-04-18 17:38:08.030642885 +0200
  1398. @@ -614,6 +614,7 @@
  1399. }
  1400. }
  1401. +#ifndef CONFIG_PREEMPT_RT_FULL
  1402. void do_softirq_own_stack(void)
  1403. {
  1404. struct thread_info *curtp, *irqtp;
  1405. @@ -631,6 +632,7 @@
  1406. if (irqtp->flags)
  1407. set_bits(irqtp->flags, &curtp->flags);
  1408. }
  1409. +#endif
  1410. irq_hw_number_t virq_to_hw(unsigned int virq)
  1411. {
  1412. diff -Nur linux-4.4.62.orig/arch/powerpc/kernel/misc_32.S linux-4.4.62/arch/powerpc/kernel/misc_32.S
  1413. --- linux-4.4.62.orig/arch/powerpc/kernel/misc_32.S 2017-04-18 07:15:37.000000000 +0200
  1414. +++ linux-4.4.62/arch/powerpc/kernel/misc_32.S 2017-04-18 17:38:08.030642885 +0200
  1415. @@ -40,6 +40,7 @@
  1416. * We store the saved ksp_limit in the unused part
  1417. * of the STACK_FRAME_OVERHEAD
  1418. */
  1419. +#ifndef CONFIG_PREEMPT_RT_FULL
  1420. _GLOBAL(call_do_softirq)
  1421. mflr r0
  1422. stw r0,4(r1)
  1423. @@ -56,6 +57,7 @@
  1424. stw r10,THREAD+KSP_LIMIT(r2)
  1425. mtlr r0
  1426. blr
  1427. +#endif
  1428. /*
  1429. * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp);
  1430. diff -Nur linux-4.4.62.orig/arch/powerpc/kernel/misc_64.S linux-4.4.62/arch/powerpc/kernel/misc_64.S
  1431. --- linux-4.4.62.orig/arch/powerpc/kernel/misc_64.S 2017-04-18 07:15:37.000000000 +0200
  1432. +++ linux-4.4.62/arch/powerpc/kernel/misc_64.S 2017-04-18 17:38:08.030642885 +0200
  1433. @@ -30,6 +30,7 @@
  1434. .text
  1435. +#ifndef CONFIG_PREEMPT_RT_FULL
  1436. _GLOBAL(call_do_softirq)
  1437. mflr r0
  1438. std r0,16(r1)
  1439. @@ -40,6 +41,7 @@
  1440. ld r0,16(r1)
  1441. mtlr r0
  1442. blr
  1443. +#endif
  1444. _GLOBAL(call_do_irq)
  1445. mflr r0
  1446. diff -Nur linux-4.4.62.orig/arch/powerpc/kvm/book3s_hv.c linux-4.4.62/arch/powerpc/kvm/book3s_hv.c
  1447. --- linux-4.4.62.orig/arch/powerpc/kvm/book3s_hv.c 2017-04-18 07:15:37.000000000 +0200
  1448. +++ linux-4.4.62/arch/powerpc/kvm/book3s_hv.c 2017-04-18 17:38:08.030642885 +0200
  1449. @@ -114,11 +114,11 @@
  1450. static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
  1451. {
  1452. int cpu;
  1453. - wait_queue_head_t *wqp;
  1454. + struct swait_queue_head *wqp;
  1455. wqp = kvm_arch_vcpu_wq(vcpu);
  1456. - if (waitqueue_active(wqp)) {
  1457. - wake_up_interruptible(wqp);
  1458. + if (swait_active(wqp)) {
  1459. + swake_up(wqp);
  1460. ++vcpu->stat.halt_wakeup;
  1461. }
  1462. @@ -707,8 +707,8 @@
  1463. tvcpu->arch.prodded = 1;
  1464. smp_mb();
  1465. if (vcpu->arch.ceded) {
  1466. - if (waitqueue_active(&vcpu->wq)) {
  1467. - wake_up_interruptible(&vcpu->wq);
  1468. + if (swait_active(&vcpu->wq)) {
  1469. + swake_up(&vcpu->wq);
  1470. vcpu->stat.halt_wakeup++;
  1471. }
  1472. }
  1473. @@ -1453,7 +1453,7 @@
  1474. INIT_LIST_HEAD(&vcore->runnable_threads);
  1475. spin_lock_init(&vcore->lock);
  1476. spin_lock_init(&vcore->stoltb_lock);
  1477. - init_waitqueue_head(&vcore->wq);
  1478. + init_swait_queue_head(&vcore->wq);
  1479. vcore->preempt_tb = TB_NIL;
  1480. vcore->lpcr = kvm->arch.lpcr;
  1481. vcore->first_vcpuid = core * threads_per_subcore;
  1482. @@ -2525,10 +2525,9 @@
  1483. {
  1484. struct kvm_vcpu *vcpu;
  1485. int do_sleep = 1;
  1486. + DECLARE_SWAITQUEUE(wait);
  1487. - DEFINE_WAIT(wait);
  1488. -
  1489. - prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
  1490. + prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
  1491. /*
  1492. * Check one last time for pending exceptions and ceded state after
  1493. @@ -2542,7 +2541,7 @@
  1494. }
  1495. if (!do_sleep) {
  1496. - finish_wait(&vc->wq, &wait);
  1497. + finish_swait(&vc->wq, &wait);
  1498. return;
  1499. }
  1500. @@ -2550,7 +2549,7 @@
  1501. trace_kvmppc_vcore_blocked(vc, 0);
  1502. spin_unlock(&vc->lock);
  1503. schedule();
  1504. - finish_wait(&vc->wq, &wait);
  1505. + finish_swait(&vc->wq, &wait);
  1506. spin_lock(&vc->lock);
  1507. vc->vcore_state = VCORE_INACTIVE;
  1508. trace_kvmppc_vcore_blocked(vc, 1);
  1509. @@ -2606,7 +2605,7 @@
  1510. kvmppc_start_thread(vcpu, vc);
  1511. trace_kvm_guest_enter(vcpu);
  1512. } else if (vc->vcore_state == VCORE_SLEEPING) {
  1513. - wake_up(&vc->wq);
  1514. + swake_up(&vc->wq);
  1515. }
  1516. }
  1517. diff -Nur linux-4.4.62.orig/arch/powerpc/kvm/Kconfig linux-4.4.62/arch/powerpc/kvm/Kconfig
  1518. --- linux-4.4.62.orig/arch/powerpc/kvm/Kconfig 2017-04-18 07:15:37.000000000 +0200
  1519. +++ linux-4.4.62/arch/powerpc/kvm/Kconfig 2017-04-18 17:38:08.030642885 +0200
  1520. @@ -172,6 +172,7 @@
  1521. config KVM_MPIC
  1522. bool "KVM in-kernel MPIC emulation"
  1523. depends on KVM && E500
  1524. + depends on !PREEMPT_RT_FULL
  1525. select HAVE_KVM_IRQCHIP
  1526. select HAVE_KVM_IRQFD
  1527. select HAVE_KVM_IRQ_ROUTING
  1528. diff -Nur linux-4.4.62.orig/arch/powerpc/platforms/ps3/device-init.c linux-4.4.62/arch/powerpc/platforms/ps3/device-init.c
  1529. --- linux-4.4.62.orig/arch/powerpc/platforms/ps3/device-init.c 2017-04-18 07:15:37.000000000 +0200
  1530. +++ linux-4.4.62/arch/powerpc/platforms/ps3/device-init.c 2017-04-18 17:38:08.030642885 +0200
  1531. @@ -752,7 +752,7 @@
  1532. }
  1533. pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op);
  1534. - res = wait_event_interruptible(dev->done.wait,
  1535. + res = swait_event_interruptible(dev->done.wait,
  1536. dev->done.done || kthread_should_stop());
  1537. if (kthread_should_stop())
  1538. res = -EINTR;
  1539. diff -Nur linux-4.4.62.orig/arch/s390/include/asm/kvm_host.h linux-4.4.62/arch/s390/include/asm/kvm_host.h
  1540. --- linux-4.4.62.orig/arch/s390/include/asm/kvm_host.h 2017-04-18 07:15:37.000000000 +0200
  1541. +++ linux-4.4.62/arch/s390/include/asm/kvm_host.h 2017-04-18 17:38:08.030642885 +0200
  1542. @@ -427,7 +427,7 @@
  1543. struct kvm_s390_local_interrupt {
  1544. spinlock_t lock;
  1545. struct kvm_s390_float_interrupt *float_int;
  1546. - wait_queue_head_t *wq;
  1547. + struct swait_queue_head *wq;
  1548. atomic_t *cpuflags;
  1549. DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
  1550. struct kvm_s390_irq_payload irq;
  1551. diff -Nur linux-4.4.62.orig/arch/s390/kvm/interrupt.c linux-4.4.62/arch/s390/kvm/interrupt.c
  1552. --- linux-4.4.62.orig/arch/s390/kvm/interrupt.c 2017-04-18 07:15:37.000000000 +0200
  1553. +++ linux-4.4.62/arch/s390/kvm/interrupt.c 2017-04-18 17:38:08.030642885 +0200
  1554. @@ -868,13 +868,13 @@
  1555. void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
  1556. {
  1557. - if (waitqueue_active(&vcpu->wq)) {
  1558. + if (swait_active(&vcpu->wq)) {
  1559. /*
  1560. * The vcpu gave up the cpu voluntarily, mark it as a good
  1561. * yield-candidate.
  1562. */
  1563. vcpu->preempted = true;
  1564. - wake_up_interruptible(&vcpu->wq);
  1565. + swake_up(&vcpu->wq);
  1566. vcpu->stat.halt_wakeup++;
  1567. }
  1568. }
  1569. diff -Nur linux-4.4.62.orig/arch/sh/kernel/irq.c linux-4.4.62/arch/sh/kernel/irq.c
  1570. --- linux-4.4.62.orig/arch/sh/kernel/irq.c 2017-04-18 07:15:37.000000000 +0200
  1571. +++ linux-4.4.62/arch/sh/kernel/irq.c 2017-04-18 17:38:08.030642885 +0200
  1572. @@ -147,6 +147,7 @@
  1573. hardirq_ctx[cpu] = NULL;
  1574. }
  1575. +#ifndef CONFIG_PREEMPT_RT_FULL
  1576. void do_softirq_own_stack(void)
  1577. {
  1578. struct thread_info *curctx;
  1579. @@ -174,6 +175,7 @@
  1580. "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr"
  1581. );
  1582. }
  1583. +#endif
  1584. #else
  1585. static inline void handle_one_irq(unsigned int irq)
  1586. {
  1587. diff -Nur linux-4.4.62.orig/arch/sparc/Kconfig linux-4.4.62/arch/sparc/Kconfig
  1588. --- linux-4.4.62.orig/arch/sparc/Kconfig 2017-04-18 07:15:37.000000000 +0200
  1589. +++ linux-4.4.62/arch/sparc/Kconfig 2017-04-18 17:38:08.030642885 +0200
  1590. @@ -189,12 +189,10 @@
  1591. source kernel/Kconfig.hz
  1592. config RWSEM_GENERIC_SPINLOCK
  1593. - bool
  1594. - default y if SPARC32
  1595. + def_bool PREEMPT_RT_FULL
  1596. config RWSEM_XCHGADD_ALGORITHM
  1597. - bool
  1598. - default y if SPARC64
  1599. + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
  1600. config GENERIC_HWEIGHT
  1601. bool
  1602. diff -Nur linux-4.4.62.orig/arch/sparc/kernel/irq_64.c linux-4.4.62/arch/sparc/kernel/irq_64.c
  1603. --- linux-4.4.62.orig/arch/sparc/kernel/irq_64.c 2017-04-18 07:15:37.000000000 +0200
  1604. +++ linux-4.4.62/arch/sparc/kernel/irq_64.c 2017-04-18 17:38:08.030642885 +0200
  1605. @@ -854,6 +854,7 @@
  1606. set_irq_regs(old_regs);
  1607. }
  1608. +#ifndef CONFIG_PREEMPT_RT_FULL
  1609. void do_softirq_own_stack(void)
  1610. {
  1611. void *orig_sp, *sp = softirq_stack[smp_processor_id()];
  1612. @@ -868,6 +869,7 @@
  1613. __asm__ __volatile__("mov %0, %%sp"
  1614. : : "r" (orig_sp));
  1615. }
  1616. +#endif
  1617. #ifdef CONFIG_HOTPLUG_CPU
  1618. void fixup_irqs(void)
  1619. diff -Nur linux-4.4.62.orig/arch/x86/crypto/aesni-intel_glue.c linux-4.4.62/arch/x86/crypto/aesni-intel_glue.c
  1620. --- linux-4.4.62.orig/arch/x86/crypto/aesni-intel_glue.c 2017-04-18 07:15:37.000000000 +0200
  1621. +++ linux-4.4.62/arch/x86/crypto/aesni-intel_glue.c 2017-04-18 17:38:08.030642885 +0200
  1622. @@ -383,14 +383,14 @@
  1623. err = blkcipher_walk_virt(desc, &walk);
  1624. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1625. - kernel_fpu_begin();
  1626. while ((nbytes = walk.nbytes)) {
  1627. + kernel_fpu_begin();
  1628. aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1629. - nbytes & AES_BLOCK_MASK);
  1630. + nbytes & AES_BLOCK_MASK);
  1631. + kernel_fpu_end();
  1632. nbytes &= AES_BLOCK_SIZE - 1;
  1633. err = blkcipher_walk_done(desc, &walk, nbytes);
  1634. }
  1635. - kernel_fpu_end();
  1636. return err;
  1637. }
  1638. @@ -407,14 +407,14 @@
  1639. err = blkcipher_walk_virt(desc, &walk);
  1640. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1641. - kernel_fpu_begin();
  1642. while ((nbytes = walk.nbytes)) {
  1643. + kernel_fpu_begin();
  1644. aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1645. nbytes & AES_BLOCK_MASK);
  1646. + kernel_fpu_end();
  1647. nbytes &= AES_BLOCK_SIZE - 1;
  1648. err = blkcipher_walk_done(desc, &walk, nbytes);
  1649. }
  1650. - kernel_fpu_end();
  1651. return err;
  1652. }
  1653. @@ -431,14 +431,14 @@
  1654. err = blkcipher_walk_virt(desc, &walk);
  1655. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1656. - kernel_fpu_begin();
  1657. while ((nbytes = walk.nbytes)) {
  1658. + kernel_fpu_begin();
  1659. aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1660. nbytes & AES_BLOCK_MASK, walk.iv);
  1661. + kernel_fpu_end();
  1662. nbytes &= AES_BLOCK_SIZE - 1;
  1663. err = blkcipher_walk_done(desc, &walk, nbytes);
  1664. }
  1665. - kernel_fpu_end();
  1666. return err;
  1667. }
  1668. @@ -455,14 +455,14 @@
  1669. err = blkcipher_walk_virt(desc, &walk);
  1670. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1671. - kernel_fpu_begin();
  1672. while ((nbytes = walk.nbytes)) {
  1673. + kernel_fpu_begin();
  1674. aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1675. nbytes & AES_BLOCK_MASK, walk.iv);
  1676. + kernel_fpu_end();
  1677. nbytes &= AES_BLOCK_SIZE - 1;
  1678. err = blkcipher_walk_done(desc, &walk, nbytes);
  1679. }
  1680. - kernel_fpu_end();
  1681. return err;
  1682. }
  1683. @@ -514,18 +514,20 @@
  1684. err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
  1685. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1686. - kernel_fpu_begin();
  1687. while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
  1688. + kernel_fpu_begin();
  1689. aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  1690. nbytes & AES_BLOCK_MASK, walk.iv);
  1691. + kernel_fpu_end();
  1692. nbytes &= AES_BLOCK_SIZE - 1;
  1693. err = blkcipher_walk_done(desc, &walk, nbytes);
  1694. }
  1695. if (walk.nbytes) {
  1696. + kernel_fpu_begin();
  1697. ctr_crypt_final(ctx, &walk);
  1698. + kernel_fpu_end();
  1699. err = blkcipher_walk_done(desc, &walk, 0);
  1700. }
  1701. - kernel_fpu_end();
  1702. return err;
  1703. }
  1704. diff -Nur linux-4.4.62.orig/arch/x86/crypto/cast5_avx_glue.c linux-4.4.62/arch/x86/crypto/cast5_avx_glue.c
  1705. --- linux-4.4.62.orig/arch/x86/crypto/cast5_avx_glue.c 2017-04-18 07:15:37.000000000 +0200
  1706. +++ linux-4.4.62/arch/x86/crypto/cast5_avx_glue.c 2017-04-18 17:38:08.030642885 +0200
  1707. @@ -59,7 +59,7 @@
  1708. static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
  1709. bool enc)
  1710. {
  1711. - bool fpu_enabled = false;
  1712. + bool fpu_enabled;
  1713. struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
  1714. const unsigned int bsize = CAST5_BLOCK_SIZE;
  1715. unsigned int nbytes;
  1716. @@ -75,7 +75,7 @@
  1717. u8 *wsrc = walk->src.virt.addr;
  1718. u8 *wdst = walk->dst.virt.addr;
  1719. - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
  1720. + fpu_enabled = cast5_fpu_begin(false, nbytes);
  1721. /* Process multi-block batch */
  1722. if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
  1723. @@ -103,10 +103,9 @@
  1724. } while (nbytes >= bsize);
  1725. done:
  1726. + cast5_fpu_end(fpu_enabled);
  1727. err = blkcipher_walk_done(desc, walk, nbytes);
  1728. }
  1729. -
  1730. - cast5_fpu_end(fpu_enabled);
  1731. return err;
  1732. }
  1733. @@ -227,7 +226,7 @@
  1734. static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  1735. struct scatterlist *src, unsigned int nbytes)
  1736. {
  1737. - bool fpu_enabled = false;
  1738. + bool fpu_enabled;
  1739. struct blkcipher_walk walk;
  1740. int err;
  1741. @@ -236,12 +235,11 @@
  1742. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1743. while ((nbytes = walk.nbytes)) {
  1744. - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
  1745. + fpu_enabled = cast5_fpu_begin(false, nbytes);
  1746. nbytes = __cbc_decrypt(desc, &walk);
  1747. + cast5_fpu_end(fpu_enabled);
  1748. err = blkcipher_walk_done(desc, &walk, nbytes);
  1749. }
  1750. -
  1751. - cast5_fpu_end(fpu_enabled);
  1752. return err;
  1753. }
  1754. @@ -311,7 +309,7 @@
  1755. static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  1756. struct scatterlist *src, unsigned int nbytes)
  1757. {
  1758. - bool fpu_enabled = false;
  1759. + bool fpu_enabled;
  1760. struct blkcipher_walk walk;
  1761. int err;
  1762. @@ -320,13 +318,12 @@
  1763. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  1764. while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
  1765. - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
  1766. + fpu_enabled = cast5_fpu_begin(false, nbytes);
  1767. nbytes = __ctr_crypt(desc, &walk);
  1768. + cast5_fpu_end(fpu_enabled);
  1769. err = blkcipher_walk_done(desc, &walk, nbytes);
  1770. }
  1771. - cast5_fpu_end(fpu_enabled);
  1772. -
  1773. if (walk.nbytes) {
  1774. ctr_crypt_final(desc, &walk);
  1775. err = blkcipher_walk_done(desc, &walk, 0);
  1776. diff -Nur linux-4.4.62.orig/arch/x86/crypto/glue_helper.c linux-4.4.62/arch/x86/crypto/glue_helper.c
  1777. --- linux-4.4.62.orig/arch/x86/crypto/glue_helper.c 2017-04-18 07:15:37.000000000 +0200
  1778. +++ linux-4.4.62/arch/x86/crypto/glue_helper.c 2017-04-18 17:38:08.030642885 +0200
  1779. @@ -39,7 +39,7 @@
  1780. void *ctx = crypto_blkcipher_ctx(desc->tfm);
  1781. const unsigned int bsize = 128 / 8;
  1782. unsigned int nbytes, i, func_bytes;
  1783. - bool fpu_enabled = false;
  1784. + bool fpu_enabled;
  1785. int err;
  1786. err = blkcipher_walk_virt(desc, walk);
  1787. @@ -49,7 +49,7 @@
  1788. u8 *wdst = walk->dst.virt.addr;
  1789. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1790. - desc, fpu_enabled, nbytes);
  1791. + desc, false, nbytes);
  1792. for (i = 0; i < gctx->num_funcs; i++) {
  1793. func_bytes = bsize * gctx->funcs[i].num_blocks;
  1794. @@ -71,10 +71,10 @@
  1795. }
  1796. done:
  1797. + glue_fpu_end(fpu_enabled);
  1798. err = blkcipher_walk_done(desc, walk, nbytes);
  1799. }
  1800. - glue_fpu_end(fpu_enabled);
  1801. return err;
  1802. }
  1803. @@ -194,7 +194,7 @@
  1804. struct scatterlist *src, unsigned int nbytes)
  1805. {
  1806. const unsigned int bsize = 128 / 8;
  1807. - bool fpu_enabled = false;
  1808. + bool fpu_enabled;
  1809. struct blkcipher_walk walk;
  1810. int err;
  1811. @@ -203,12 +203,12 @@
  1812. while ((nbytes = walk.nbytes)) {
  1813. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1814. - desc, fpu_enabled, nbytes);
  1815. + desc, false, nbytes);
  1816. nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk);
  1817. + glue_fpu_end(fpu_enabled);
  1818. err = blkcipher_walk_done(desc, &walk, nbytes);
  1819. }
  1820. - glue_fpu_end(fpu_enabled);
  1821. return err;
  1822. }
  1823. EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit);
  1824. @@ -277,7 +277,7 @@
  1825. struct scatterlist *src, unsigned int nbytes)
  1826. {
  1827. const unsigned int bsize = 128 / 8;
  1828. - bool fpu_enabled = false;
  1829. + bool fpu_enabled;
  1830. struct blkcipher_walk walk;
  1831. int err;
  1832. @@ -286,13 +286,12 @@
  1833. while ((nbytes = walk.nbytes) >= bsize) {
  1834. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1835. - desc, fpu_enabled, nbytes);
  1836. + desc, false, nbytes);
  1837. nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk);
  1838. + glue_fpu_end(fpu_enabled);
  1839. err = blkcipher_walk_done(desc, &walk, nbytes);
  1840. }
  1841. - glue_fpu_end(fpu_enabled);
  1842. -
  1843. if (walk.nbytes) {
  1844. glue_ctr_crypt_final_128bit(
  1845. gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk);
  1846. @@ -347,7 +346,7 @@
  1847. void *tweak_ctx, void *crypt_ctx)
  1848. {
  1849. const unsigned int bsize = 128 / 8;
  1850. - bool fpu_enabled = false;
  1851. + bool fpu_enabled;
  1852. struct blkcipher_walk walk;
  1853. int err;
  1854. @@ -360,21 +359,21 @@
  1855. /* set minimum length to bsize, for tweak_fn */
  1856. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1857. - desc, fpu_enabled,
  1858. + desc, false,
  1859. nbytes < bsize ? bsize : nbytes);
  1860. -
  1861. /* calculate first value of T */
  1862. tweak_fn(tweak_ctx, walk.iv, walk.iv);
  1863. + glue_fpu_end(fpu_enabled);
  1864. while (nbytes) {
  1865. + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  1866. + desc, false, nbytes);
  1867. nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk);
  1868. + glue_fpu_end(fpu_enabled);
  1869. err = blkcipher_walk_done(desc, &walk, nbytes);
  1870. nbytes = walk.nbytes;
  1871. }
  1872. -
  1873. - glue_fpu_end(fpu_enabled);
  1874. -
  1875. return err;
  1876. }
  1877. EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit);
  1878. diff -Nur linux-4.4.62.orig/arch/x86/entry/common.c linux-4.4.62/arch/x86/entry/common.c
  1879. --- linux-4.4.62.orig/arch/x86/entry/common.c 2017-04-18 07:15:37.000000000 +0200
  1880. +++ linux-4.4.62/arch/x86/entry/common.c 2017-04-18 17:38:08.030642885 +0200
  1881. @@ -220,7 +220,7 @@
  1882. #define EXIT_TO_USERMODE_LOOP_FLAGS \
  1883. (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
  1884. - _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY)
  1885. + _TIF_NEED_RESCHED_MASK | _TIF_USER_RETURN_NOTIFY)
  1886. static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
  1887. {
  1888. @@ -236,9 +236,16 @@
  1889. /* We have work to do. */
  1890. local_irq_enable();
  1891. - if (cached_flags & _TIF_NEED_RESCHED)
  1892. + if (cached_flags & _TIF_NEED_RESCHED_MASK)
  1893. schedule();
  1894. +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
  1895. + if (unlikely(current->forced_info.si_signo)) {
  1896. + struct task_struct *t = current;
  1897. + force_sig_info(t->forced_info.si_signo, &t->forced_info, t);
  1898. + t->forced_info.si_signo = 0;
  1899. + }
  1900. +#endif
  1901. if (cached_flags & _TIF_UPROBE)
  1902. uprobe_notify_resume(regs);
  1903. diff -Nur linux-4.4.62.orig/arch/x86/entry/entry_32.S linux-4.4.62/arch/x86/entry/entry_32.S
  1904. --- linux-4.4.62.orig/arch/x86/entry/entry_32.S 2017-04-18 07:15:37.000000000 +0200
  1905. +++ linux-4.4.62/arch/x86/entry/entry_32.S 2017-04-18 17:38:08.030642885 +0200
  1906. @@ -278,8 +278,24 @@
  1907. ENTRY(resume_kernel)
  1908. DISABLE_INTERRUPTS(CLBR_ANY)
  1909. need_resched:
  1910. + # preempt count == 0 + NEED_RS set?
  1911. cmpl $0, PER_CPU_VAR(__preempt_count)
  1912. +#ifndef CONFIG_PREEMPT_LAZY
  1913. jnz restore_all
  1914. +#else
  1915. + jz test_int_off
  1916. +
  1917. + # atleast preempt count == 0 ?
  1918. + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
  1919. + jne restore_all
  1920. +
  1921. + cmpl $0,TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ?
  1922. + jnz restore_all
  1923. +
  1924. + testl $_TIF_NEED_RESCHED_LAZY, TI_flags(%ebp)
  1925. + jz restore_all
  1926. +test_int_off:
  1927. +#endif
  1928. testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
  1929. jz restore_all
  1930. call preempt_schedule_irq
  1931. diff -Nur linux-4.4.62.orig/arch/x86/entry/entry_64.S linux-4.4.62/arch/x86/entry/entry_64.S
  1932. --- linux-4.4.62.orig/arch/x86/entry/entry_64.S 2017-04-18 07:15:37.000000000 +0200
  1933. +++ linux-4.4.62/arch/x86/entry/entry_64.S 2017-04-18 17:38:08.034643040 +0200
  1934. @@ -579,7 +579,23 @@
  1935. bt $9, EFLAGS(%rsp) /* were interrupts off? */
  1936. jnc 1f
  1937. 0: cmpl $0, PER_CPU_VAR(__preempt_count)
  1938. +#ifndef CONFIG_PREEMPT_LAZY
  1939. jnz 1f
  1940. +#else
  1941. + jz do_preempt_schedule_irq
  1942. +
  1943. + # atleast preempt count == 0 ?
  1944. + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
  1945. + jnz 1f
  1946. +
  1947. + GET_THREAD_INFO(%rcx)
  1948. + cmpl $0, TI_preempt_lazy_count(%rcx)
  1949. + jnz 1f
  1950. +
  1951. + bt $TIF_NEED_RESCHED_LAZY,TI_flags(%rcx)
  1952. + jnc 1f
  1953. +do_preempt_schedule_irq:
  1954. +#endif
  1955. call preempt_schedule_irq
  1956. jmp 0b
  1957. 1:
  1958. @@ -867,6 +883,7 @@
  1959. jmp 2b
  1960. .previous
  1961. +#ifndef CONFIG_PREEMPT_RT_FULL
  1962. /* Call softirq on interrupt stack. Interrupts are off. */
  1963. ENTRY(do_softirq_own_stack)
  1964. pushq %rbp
  1965. @@ -879,6 +896,7 @@
  1966. decl PER_CPU_VAR(irq_count)
  1967. ret
  1968. END(do_softirq_own_stack)
  1969. +#endif
  1970. #ifdef CONFIG_XEN
  1971. idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
  1972. diff -Nur linux-4.4.62.orig/arch/x86/include/asm/preempt.h linux-4.4.62/arch/x86/include/asm/preempt.h
  1973. --- linux-4.4.62.orig/arch/x86/include/asm/preempt.h 2017-04-18 07:15:37.000000000 +0200
  1974. +++ linux-4.4.62/arch/x86/include/asm/preempt.h 2017-04-18 17:38:08.034643040 +0200
  1975. @@ -79,17 +79,46 @@
  1976. * a decrement which hits zero means we have no preempt_count and should
  1977. * reschedule.
  1978. */
  1979. -static __always_inline bool __preempt_count_dec_and_test(void)
  1980. +static __always_inline bool ____preempt_count_dec_and_test(void)
  1981. {
  1982. GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e");
  1983. }
  1984. +static __always_inline bool __preempt_count_dec_and_test(void)
  1985. +{
  1986. + if (____preempt_count_dec_and_test())
  1987. + return true;
  1988. +#ifdef CONFIG_PREEMPT_LAZY
  1989. + if (current_thread_info()->preempt_lazy_count)
  1990. + return false;
  1991. + return test_thread_flag(TIF_NEED_RESCHED_LAZY);
  1992. +#else
  1993. + return false;
  1994. +#endif
  1995. +}
  1996. +
  1997. /*
  1998. * Returns true when we need to resched and can (barring IRQ state).
  1999. */
  2000. static __always_inline bool should_resched(int preempt_offset)
  2001. {
  2002. +#ifdef CONFIG_PREEMPT_LAZY
  2003. + u32 tmp;
  2004. +
  2005. + tmp = raw_cpu_read_4(__preempt_count);
  2006. + if (tmp == preempt_offset)
  2007. + return true;
  2008. +
  2009. + /* preempt count == 0 ? */
  2010. + tmp &= ~PREEMPT_NEED_RESCHED;
  2011. + if (tmp)
  2012. + return false;
  2013. + if (current_thread_info()->preempt_lazy_count)
  2014. + return false;
  2015. + return test_thread_flag(TIF_NEED_RESCHED_LAZY);
  2016. +#else
  2017. return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
  2018. +#endif
  2019. }
  2020. #ifdef CONFIG_PREEMPT
  2021. diff -Nur linux-4.4.62.orig/arch/x86/include/asm/signal.h linux-4.4.62/arch/x86/include/asm/signal.h
  2022. --- linux-4.4.62.orig/arch/x86/include/asm/signal.h 2017-04-18 07:15:37.000000000 +0200
  2023. +++ linux-4.4.62/arch/x86/include/asm/signal.h 2017-04-18 17:38:08.034643040 +0200
  2024. @@ -23,6 +23,19 @@
  2025. unsigned long sig[_NSIG_WORDS];
  2026. } sigset_t;
  2027. +/*
  2028. + * Because some traps use the IST stack, we must keep preemption
  2029. + * disabled while calling do_trap(), but do_trap() may call
  2030. + * force_sig_info() which will grab the signal spin_locks for the
  2031. + * task, which in PREEMPT_RT_FULL are mutexes. By defining
  2032. + * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set
  2033. + * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
  2034. + * trap.
  2035. + */
  2036. +#if defined(CONFIG_PREEMPT_RT_FULL)
  2037. +#define ARCH_RT_DELAYS_SIGNAL_SEND
  2038. +#endif
  2039. +
  2040. #ifndef CONFIG_COMPAT
  2041. typedef sigset_t compat_sigset_t;
  2042. #endif
  2043. diff -Nur linux-4.4.62.orig/arch/x86/include/asm/stackprotector.h linux-4.4.62/arch/x86/include/asm/stackprotector.h
  2044. --- linux-4.4.62.orig/arch/x86/include/asm/stackprotector.h 2017-04-18 07:15:37.000000000 +0200
  2045. +++ linux-4.4.62/arch/x86/include/asm/stackprotector.h 2017-04-18 17:38:08.034643040 +0200
  2046. @@ -59,7 +59,7 @@
  2047. */
  2048. static __always_inline void boot_init_stack_canary(void)
  2049. {
  2050. - u64 canary;
  2051. + u64 uninitialized_var(canary);
  2052. u64 tsc;
  2053. #ifdef CONFIG_X86_64
  2054. @@ -70,8 +70,15 @@
  2055. * of randomness. The TSC only matters for very early init,
  2056. * there it already has some randomness on most systems. Later
  2057. * on during the bootup the random pool has true entropy too.
  2058. + *
  2059. + * For preempt-rt we need to weaken the randomness a bit, as
  2060. + * we can't call into the random generator from atomic context
  2061. + * due to locking constraints. We just leave canary
  2062. + * uninitialized and use the TSC based randomness on top of it.
  2063. */
  2064. +#ifndef CONFIG_PREEMPT_RT_FULL
  2065. get_random_bytes(&canary, sizeof(canary));
  2066. +#endif
  2067. tsc = rdtsc();
  2068. canary += tsc + (tsc << 32UL);
  2069. diff -Nur linux-4.4.62.orig/arch/x86/include/asm/thread_info.h linux-4.4.62/arch/x86/include/asm/thread_info.h
  2070. --- linux-4.4.62.orig/arch/x86/include/asm/thread_info.h 2017-04-18 07:15:37.000000000 +0200
  2071. +++ linux-4.4.62/arch/x86/include/asm/thread_info.h 2017-04-18 17:38:08.034643040 +0200
  2072. @@ -58,6 +58,8 @@
  2073. __u32 status; /* thread synchronous flags */
  2074. __u32 cpu; /* current CPU */
  2075. mm_segment_t addr_limit;
  2076. + int preempt_lazy_count; /* 0 => lazy preemptable
  2077. + <0 => BUG */
  2078. unsigned int sig_on_uaccess_error:1;
  2079. unsigned int uaccess_err:1; /* uaccess failed */
  2080. };
  2081. @@ -95,6 +97,7 @@
  2082. #define TIF_SYSCALL_EMU 6 /* syscall emulation active */
  2083. #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
  2084. #define TIF_SECCOMP 8 /* secure computing */
  2085. +#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */
  2086. #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
  2087. #define TIF_UPROBE 12 /* breakpointed or singlestepping */
  2088. #define TIF_NOTSC 16 /* TSC is not accessible in userland */
  2089. @@ -119,6 +122,7 @@
  2090. #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
  2091. #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
  2092. #define _TIF_SECCOMP (1 << TIF_SECCOMP)
  2093. +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
  2094. #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
  2095. #define _TIF_UPROBE (1 << TIF_UPROBE)
  2096. #define _TIF_NOTSC (1 << TIF_NOTSC)
  2097. @@ -152,6 +156,8 @@
  2098. #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
  2099. #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
  2100. +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
  2101. +
  2102. #define STACK_WARN (THREAD_SIZE/8)
  2103. /*
  2104. diff -Nur linux-4.4.62.orig/arch/x86/include/asm/uv/uv_bau.h linux-4.4.62/arch/x86/include/asm/uv/uv_bau.h
  2105. --- linux-4.4.62.orig/arch/x86/include/asm/uv/uv_bau.h 2017-04-18 07:15:37.000000000 +0200
  2106. +++ linux-4.4.62/arch/x86/include/asm/uv/uv_bau.h 2017-04-18 17:38:08.034643040 +0200
  2107. @@ -615,9 +615,9 @@
  2108. cycles_t send_message;
  2109. cycles_t period_end;
  2110. cycles_t period_time;
  2111. - spinlock_t uvhub_lock;
  2112. - spinlock_t queue_lock;
  2113. - spinlock_t disable_lock;
  2114. + raw_spinlock_t uvhub_lock;
  2115. + raw_spinlock_t queue_lock;
  2116. + raw_spinlock_t disable_lock;
  2117. /* tunables */
  2118. int max_concurr;
  2119. int max_concurr_const;
  2120. @@ -776,15 +776,15 @@
  2121. * to be lowered below the current 'v'. atomic_add_unless can only stop
  2122. * on equal.
  2123. */
  2124. -static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
  2125. +static inline int atomic_inc_unless_ge(raw_spinlock_t *lock, atomic_t *v, int u)
  2126. {
  2127. - spin_lock(lock);
  2128. + raw_spin_lock(lock);
  2129. if (atomic_read(v) >= u) {
  2130. - spin_unlock(lock);
  2131. + raw_spin_unlock(lock);
  2132. return 0;
  2133. }
  2134. atomic_inc(v);
  2135. - spin_unlock(lock);
  2136. + raw_spin_unlock(lock);
  2137. return 1;
  2138. }
  2139. diff -Nur linux-4.4.62.orig/arch/x86/include/asm/uv/uv_hub.h linux-4.4.62/arch/x86/include/asm/uv/uv_hub.h
  2140. --- linux-4.4.62.orig/arch/x86/include/asm/uv/uv_hub.h 2017-04-18 07:15:37.000000000 +0200
  2141. +++ linux-4.4.62/arch/x86/include/asm/uv/uv_hub.h 2017-04-18 17:38:08.034643040 +0200
  2142. @@ -492,7 +492,7 @@
  2143. unsigned short nr_online_cpus;
  2144. unsigned short pnode;
  2145. short memory_nid;
  2146. - spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */
  2147. + raw_spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */
  2148. unsigned long nmi_count; /* obsolete, see uv_hub_nmi */
  2149. };
  2150. extern struct uv_blade_info *uv_blade_info;
  2151. diff -Nur linux-4.4.62.orig/arch/x86/Kconfig linux-4.4.62/arch/x86/Kconfig
  2152. --- linux-4.4.62.orig/arch/x86/Kconfig 2017-04-18 07:15:37.000000000 +0200
  2153. +++ linux-4.4.62/arch/x86/Kconfig 2017-04-18 17:38:08.030642885 +0200
  2154. @@ -17,6 +17,7 @@
  2155. ### Arch settings
  2156. config X86
  2157. def_bool y
  2158. + select HAVE_PREEMPT_LAZY
  2159. select ACPI_LEGACY_TABLES_LOOKUP if ACPI
  2160. select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
  2161. select ANON_INODES
  2162. @@ -212,8 +213,11 @@
  2163. def_bool y
  2164. depends on ISA_DMA_API
  2165. +config RWSEM_GENERIC_SPINLOCK
  2166. + def_bool PREEMPT_RT_FULL
  2167. +
  2168. config RWSEM_XCHGADD_ALGORITHM
  2169. - def_bool y
  2170. + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
  2171. config GENERIC_CALIBRATE_DELAY
  2172. def_bool y
  2173. @@ -848,7 +852,7 @@
  2174. config MAXSMP
  2175. bool "Enable Maximum number of SMP Processors and NUMA Nodes"
  2176. depends on X86_64 && SMP && DEBUG_KERNEL
  2177. - select CPUMASK_OFFSTACK
  2178. + select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL
  2179. ---help---
  2180. Enable maximum number of CPUS and NUMA Nodes for this architecture.
  2181. If unsure, say N.
  2182. diff -Nur linux-4.4.62.orig/arch/x86/kernel/acpi/boot.c linux-4.4.62/arch/x86/kernel/acpi/boot.c
  2183. --- linux-4.4.62.orig/arch/x86/kernel/acpi/boot.c 2017-04-18 07:15:37.000000000 +0200
  2184. +++ linux-4.4.62/arch/x86/kernel/acpi/boot.c 2017-04-18 17:38:08.034643040 +0200
  2185. @@ -87,7 +87,9 @@
  2186. * ->ioapic_mutex
  2187. * ->ioapic_lock
  2188. */
  2189. +#ifdef CONFIG_X86_IO_APIC
  2190. static DEFINE_MUTEX(acpi_ioapic_lock);
  2191. +#endif
  2192. /* --------------------------------------------------------------------------
  2193. Boot-time Configuration
  2194. diff -Nur linux-4.4.62.orig/arch/x86/kernel/apic/io_apic.c linux-4.4.62/arch/x86/kernel/apic/io_apic.c
  2195. --- linux-4.4.62.orig/arch/x86/kernel/apic/io_apic.c 2017-04-18 07:15:37.000000000 +0200
  2196. +++ linux-4.4.62/arch/x86/kernel/apic/io_apic.c 2017-04-18 17:38:08.034643040 +0200
  2197. @@ -1711,7 +1711,8 @@
  2198. static inline bool ioapic_irqd_mask(struct irq_data *data)
  2199. {
  2200. /* If we are moving the irq we need to mask it */
  2201. - if (unlikely(irqd_is_setaffinity_pending(data))) {
  2202. + if (unlikely(irqd_is_setaffinity_pending(data) &&
  2203. + !irqd_irq_inprogress(data))) {
  2204. mask_ioapic_irq(data);
  2205. return true;
  2206. }
  2207. diff -Nur linux-4.4.62.orig/arch/x86/kernel/apic/x2apic_uv_x.c linux-4.4.62/arch/x86/kernel/apic/x2apic_uv_x.c
  2208. --- linux-4.4.62.orig/arch/x86/kernel/apic/x2apic_uv_x.c 2017-04-18 07:15:37.000000000 +0200
  2209. +++ linux-4.4.62/arch/x86/kernel/apic/x2apic_uv_x.c 2017-04-18 17:38:08.034643040 +0200
  2210. @@ -947,7 +947,7 @@
  2211. uv_blade_info[blade].pnode = pnode;
  2212. uv_blade_info[blade].nr_possible_cpus = 0;
  2213. uv_blade_info[blade].nr_online_cpus = 0;
  2214. - spin_lock_init(&uv_blade_info[blade].nmi_lock);
  2215. + raw_spin_lock_init(&uv_blade_info[blade].nmi_lock);
  2216. min_pnode = min(pnode, min_pnode);
  2217. max_pnode = max(pnode, max_pnode);
  2218. blade++;
  2219. diff -Nur linux-4.4.62.orig/arch/x86/kernel/asm-offsets.c linux-4.4.62/arch/x86/kernel/asm-offsets.c
  2220. --- linux-4.4.62.orig/arch/x86/kernel/asm-offsets.c 2017-04-18 07:15:37.000000000 +0200
  2221. +++ linux-4.4.62/arch/x86/kernel/asm-offsets.c 2017-04-18 17:38:08.034643040 +0200
  2222. @@ -32,6 +32,7 @@
  2223. OFFSET(TI_flags, thread_info, flags);
  2224. OFFSET(TI_status, thread_info, status);
  2225. OFFSET(TI_addr_limit, thread_info, addr_limit);
  2226. + OFFSET(TI_preempt_lazy_count, thread_info, preempt_lazy_count);
  2227. BLANK();
  2228. OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
  2229. @@ -89,4 +90,5 @@
  2230. BLANK();
  2231. DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
  2232. + DEFINE(_PREEMPT_ENABLED, PREEMPT_ENABLED);
  2233. }
  2234. diff -Nur linux-4.4.62.orig/arch/x86/kernel/cpu/mcheck/mce.c linux-4.4.62/arch/x86/kernel/cpu/mcheck/mce.c
  2235. --- linux-4.4.62.orig/arch/x86/kernel/cpu/mcheck/mce.c 2017-04-18 07:15:37.000000000 +0200
  2236. +++ linux-4.4.62/arch/x86/kernel/cpu/mcheck/mce.c 2017-04-18 17:38:08.034643040 +0200
  2237. @@ -41,6 +41,8 @@
  2238. #include <linux/debugfs.h>
  2239. #include <linux/irq_work.h>
  2240. #include <linux/export.h>
  2241. +#include <linux/jiffies.h>
  2242. +#include <linux/swork.h>
  2243. #include <asm/processor.h>
  2244. #include <asm/traps.h>
  2245. @@ -1236,7 +1238,7 @@
  2246. static unsigned long check_interval = INITIAL_CHECK_INTERVAL;
  2247. static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
  2248. -static DEFINE_PER_CPU(struct timer_list, mce_timer);
  2249. +static DEFINE_PER_CPU(struct hrtimer, mce_timer);
  2250. static unsigned long mce_adjust_timer_default(unsigned long interval)
  2251. {
  2252. @@ -1245,32 +1247,18 @@
  2253. static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
  2254. -static void __restart_timer(struct timer_list *t, unsigned long interval)
  2255. +static enum hrtimer_restart __restart_timer(struct hrtimer *timer, unsigned long interval)
  2256. {
  2257. - unsigned long when = jiffies + interval;
  2258. - unsigned long flags;
  2259. -
  2260. - local_irq_save(flags);
  2261. -
  2262. - if (timer_pending(t)) {
  2263. - if (time_before(when, t->expires))
  2264. - mod_timer_pinned(t, when);
  2265. - } else {
  2266. - t->expires = round_jiffies(when);
  2267. - add_timer_on(t, smp_processor_id());
  2268. - }
  2269. -
  2270. - local_irq_restore(flags);
  2271. + if (!interval)
  2272. + return HRTIMER_NORESTART;
  2273. + hrtimer_forward_now(timer, ns_to_ktime(jiffies_to_nsecs(interval)));
  2274. + return HRTIMER_RESTART;
  2275. }
  2276. -static void mce_timer_fn(unsigned long data)
  2277. +static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer)
  2278. {
  2279. - struct timer_list *t = this_cpu_ptr(&mce_timer);
  2280. - int cpu = smp_processor_id();
  2281. unsigned long iv;
  2282. - WARN_ON(cpu != data);
  2283. -
  2284. iv = __this_cpu_read(mce_next_interval);
  2285. if (mce_available(this_cpu_ptr(&cpu_info))) {
  2286. @@ -1293,7 +1281,7 @@
  2287. done:
  2288. __this_cpu_write(mce_next_interval, iv);
  2289. - __restart_timer(t, iv);
  2290. + return __restart_timer(timer, iv);
  2291. }
  2292. /*
  2293. @@ -1301,7 +1289,7 @@
  2294. */
  2295. void mce_timer_kick(unsigned long interval)
  2296. {
  2297. - struct timer_list *t = this_cpu_ptr(&mce_timer);
  2298. + struct hrtimer *t = this_cpu_ptr(&mce_timer);
  2299. unsigned long iv = __this_cpu_read(mce_next_interval);
  2300. __restart_timer(t, interval);
  2301. @@ -1316,7 +1304,7 @@
  2302. int cpu;
  2303. for_each_online_cpu(cpu)
  2304. - del_timer_sync(&per_cpu(mce_timer, cpu));
  2305. + hrtimer_cancel(&per_cpu(mce_timer, cpu));
  2306. }
  2307. static void mce_do_trigger(struct work_struct *work)
  2308. @@ -1326,6 +1314,56 @@
  2309. static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
  2310. +static void __mce_notify_work(struct swork_event *event)
  2311. +{
  2312. + /* Not more than two messages every minute */
  2313. + static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
  2314. +
  2315. + /* wake processes polling /dev/mcelog */
  2316. + wake_up_interruptible(&mce_chrdev_wait);
  2317. +
  2318. + /*
  2319. + * There is no risk of missing notifications because
  2320. + * work_pending is always cleared before the function is
  2321. + * executed.
  2322. + */
  2323. + if (mce_helper[0] && !work_pending(&mce_trigger_work))
  2324. + schedule_work(&mce_trigger_work);
  2325. +
  2326. + if (__ratelimit(&ratelimit))
  2327. + pr_info(HW_ERR "Machine check events logged\n");
  2328. +}
  2329. +
  2330. +#ifdef CONFIG_PREEMPT_RT_FULL
  2331. +static bool notify_work_ready __read_mostly;
  2332. +static struct swork_event notify_work;
  2333. +
  2334. +static int mce_notify_work_init(void)
  2335. +{
  2336. + int err;
  2337. +
  2338. + err = swork_get();
  2339. + if (err)
  2340. + return err;
  2341. +
  2342. + INIT_SWORK(&notify_work, __mce_notify_work);
  2343. + notify_work_ready = true;
  2344. + return 0;
  2345. +}
  2346. +
  2347. +static void mce_notify_work(void)
  2348. +{
  2349. + if (notify_work_ready)
  2350. + swork_queue(&notify_work);
  2351. +}
  2352. +#else
  2353. +static void mce_notify_work(void)
  2354. +{
  2355. + __mce_notify_work(NULL);
  2356. +}
  2357. +static inline int mce_notify_work_init(void) { return 0; }
  2358. +#endif
  2359. +
  2360. /*
  2361. * Notify the user(s) about new machine check events.
  2362. * Can be called from interrupt context, but not from machine check/NMI
  2363. @@ -1333,19 +1371,8 @@
  2364. */
  2365. int mce_notify_irq(void)
  2366. {
  2367. - /* Not more than two messages every minute */
  2368. - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
  2369. -
  2370. if (test_and_clear_bit(0, &mce_need_notify)) {
  2371. - /* wake processes polling /dev/mcelog */
  2372. - wake_up_interruptible(&mce_chrdev_wait);
  2373. -
  2374. - if (mce_helper[0])
  2375. - schedule_work(&mce_trigger_work);
  2376. -
  2377. - if (__ratelimit(&ratelimit))
  2378. - pr_info(HW_ERR "Machine check events logged\n");
  2379. -
  2380. + mce_notify_work();
  2381. return 1;
  2382. }
  2383. return 0;
  2384. @@ -1639,7 +1666,7 @@
  2385. }
  2386. }
  2387. -static void mce_start_timer(unsigned int cpu, struct timer_list *t)
  2388. +static void mce_start_timer(unsigned int cpu, struct hrtimer *t)
  2389. {
  2390. unsigned long iv = check_interval * HZ;
  2391. @@ -1648,16 +1675,17 @@
  2392. per_cpu(mce_next_interval, cpu) = iv;
  2393. - t->expires = round_jiffies(jiffies + iv);
  2394. - add_timer_on(t, cpu);
  2395. + hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL),
  2396. + 0, HRTIMER_MODE_REL_PINNED);
  2397. }
  2398. static void __mcheck_cpu_init_timer(void)
  2399. {
  2400. - struct timer_list *t = this_cpu_ptr(&mce_timer);
  2401. + struct hrtimer *t = this_cpu_ptr(&mce_timer);
  2402. unsigned int cpu = smp_processor_id();
  2403. - setup_timer(t, mce_timer_fn, cpu);
  2404. + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  2405. + t->function = mce_timer_fn;
  2406. mce_start_timer(cpu, t);
  2407. }
  2408. @@ -2376,6 +2404,8 @@
  2409. if (!mce_available(raw_cpu_ptr(&cpu_info)))
  2410. return;
  2411. + hrtimer_cancel(this_cpu_ptr(&mce_timer));
  2412. +
  2413. if (!(action & CPU_TASKS_FROZEN))
  2414. cmci_clear();
  2415. @@ -2398,6 +2428,7 @@
  2416. if (b->init)
  2417. wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
  2418. }
  2419. + __mcheck_cpu_init_timer();
  2420. }
  2421. /* Get notified when a cpu comes on/off. Be hotplug friendly. */
  2422. @@ -2405,7 +2436,6 @@
  2423. mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
  2424. {
  2425. unsigned int cpu = (unsigned long)hcpu;
  2426. - struct timer_list *t = &per_cpu(mce_timer, cpu);
  2427. switch (action & ~CPU_TASKS_FROZEN) {
  2428. case CPU_ONLINE:
  2429. @@ -2425,11 +2455,9 @@
  2430. break;
  2431. case CPU_DOWN_PREPARE:
  2432. smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
  2433. - del_timer_sync(t);
  2434. break;
  2435. case CPU_DOWN_FAILED:
  2436. smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
  2437. - mce_start_timer(cpu, t);
  2438. break;
  2439. }
  2440. @@ -2468,6 +2496,10 @@
  2441. goto err_out;
  2442. }
  2443. + err = mce_notify_work_init();
  2444. + if (err)
  2445. + goto err_out;
  2446. +
  2447. if (!zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL)) {
  2448. err = -ENOMEM;
  2449. goto err_out;
  2450. diff -Nur linux-4.4.62.orig/arch/x86/kernel/cpu/perf_event_intel_rapl.c linux-4.4.62/arch/x86/kernel/cpu/perf_event_intel_rapl.c
  2451. --- linux-4.4.62.orig/arch/x86/kernel/cpu/perf_event_intel_rapl.c 2017-04-18 07:15:37.000000000 +0200
  2452. +++ linux-4.4.62/arch/x86/kernel/cpu/perf_event_intel_rapl.c 2017-04-18 17:38:08.034643040 +0200
  2453. @@ -117,7 +117,7 @@
  2454. };
  2455. struct rapl_pmu {
  2456. - spinlock_t lock;
  2457. + raw_spinlock_t lock;
  2458. int n_active; /* number of active events */
  2459. struct list_head active_list;
  2460. struct pmu *pmu; /* pointer to rapl_pmu_class */
  2461. @@ -220,13 +220,13 @@
  2462. if (!pmu->n_active)
  2463. return HRTIMER_NORESTART;
  2464. - spin_lock_irqsave(&pmu->lock, flags);
  2465. + raw_spin_lock_irqsave(&pmu->lock, flags);
  2466. list_for_each_entry(event, &pmu->active_list, active_entry) {
  2467. rapl_event_update(event);
  2468. }
  2469. - spin_unlock_irqrestore(&pmu->lock, flags);
  2470. + raw_spin_unlock_irqrestore(&pmu->lock, flags);
  2471. hrtimer_forward_now(hrtimer, pmu->timer_interval);
  2472. @@ -263,9 +263,9 @@
  2473. struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
  2474. unsigned long flags;
  2475. - spin_lock_irqsave(&pmu->lock, flags);
  2476. + raw_spin_lock_irqsave(&pmu->lock, flags);
  2477. __rapl_pmu_event_start(pmu, event);
  2478. - spin_unlock_irqrestore(&pmu->lock, flags);
  2479. + raw_spin_unlock_irqrestore(&pmu->lock, flags);
  2480. }
  2481. static void rapl_pmu_event_stop(struct perf_event *event, int mode)
  2482. @@ -274,7 +274,7 @@
  2483. struct hw_perf_event *hwc = &event->hw;
  2484. unsigned long flags;
  2485. - spin_lock_irqsave(&pmu->lock, flags);
  2486. + raw_spin_lock_irqsave(&pmu->lock, flags);
  2487. /* mark event as deactivated and stopped */
  2488. if (!(hwc->state & PERF_HES_STOPPED)) {
  2489. @@ -299,7 +299,7 @@
  2490. hwc->state |= PERF_HES_UPTODATE;
  2491. }
  2492. - spin_unlock_irqrestore(&pmu->lock, flags);
  2493. + raw_spin_unlock_irqrestore(&pmu->lock, flags);
  2494. }
  2495. static int rapl_pmu_event_add(struct perf_event *event, int mode)
  2496. @@ -308,14 +308,14 @@
  2497. struct hw_perf_event *hwc = &event->hw;
  2498. unsigned long flags;
  2499. - spin_lock_irqsave(&pmu->lock, flags);
  2500. + raw_spin_lock_irqsave(&pmu->lock, flags);
  2501. hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
  2502. if (mode & PERF_EF_START)
  2503. __rapl_pmu_event_start(pmu, event);
  2504. - spin_unlock_irqrestore(&pmu->lock, flags);
  2505. + raw_spin_unlock_irqrestore(&pmu->lock, flags);
  2506. return 0;
  2507. }
  2508. @@ -603,7 +603,7 @@
  2509. pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
  2510. if (!pmu)
  2511. return -1;
  2512. - spin_lock_init(&pmu->lock);
  2513. + raw_spin_lock_init(&pmu->lock);
  2514. INIT_LIST_HEAD(&pmu->active_list);
  2515. diff -Nur linux-4.4.62.orig/arch/x86/kernel/dumpstack_32.c linux-4.4.62/arch/x86/kernel/dumpstack_32.c
  2516. --- linux-4.4.62.orig/arch/x86/kernel/dumpstack_32.c 2017-04-18 07:15:37.000000000 +0200
  2517. +++ linux-4.4.62/arch/x86/kernel/dumpstack_32.c 2017-04-18 17:38:08.034643040 +0200
  2518. @@ -42,7 +42,7 @@
  2519. unsigned long *stack, unsigned long bp,
  2520. const struct stacktrace_ops *ops, void *data)
  2521. {
  2522. - const unsigned cpu = get_cpu();
  2523. + const unsigned cpu = get_cpu_light();
  2524. int graph = 0;
  2525. u32 *prev_esp;
  2526. @@ -86,7 +86,7 @@
  2527. break;
  2528. touch_nmi_watchdog();
  2529. }
  2530. - put_cpu();
  2531. + put_cpu_light();
  2532. }
  2533. EXPORT_SYMBOL(dump_trace);
  2534. diff -Nur linux-4.4.62.orig/arch/x86/kernel/dumpstack_64.c linux-4.4.62/arch/x86/kernel/dumpstack_64.c
  2535. --- linux-4.4.62.orig/arch/x86/kernel/dumpstack_64.c 2017-04-18 07:15:37.000000000 +0200
  2536. +++ linux-4.4.62/arch/x86/kernel/dumpstack_64.c 2017-04-18 17:38:08.034643040 +0200
  2537. @@ -152,7 +152,7 @@
  2538. unsigned long *stack, unsigned long bp,
  2539. const struct stacktrace_ops *ops, void *data)
  2540. {
  2541. - const unsigned cpu = get_cpu();
  2542. + const unsigned cpu = get_cpu_light();
  2543. struct thread_info *tinfo;
  2544. unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu);
  2545. unsigned long dummy;
  2546. @@ -241,7 +241,7 @@
  2547. * This handles the process stack:
  2548. */
  2549. bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph);
  2550. - put_cpu();
  2551. + put_cpu_light();
  2552. }
  2553. EXPORT_SYMBOL(dump_trace);
  2554. @@ -255,7 +255,7 @@
  2555. int cpu;
  2556. int i;
  2557. - preempt_disable();
  2558. + migrate_disable();
  2559. cpu = smp_processor_id();
  2560. irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
  2561. @@ -291,7 +291,7 @@
  2562. pr_cont(" %016lx", *stack++);
  2563. touch_nmi_watchdog();
  2564. }
  2565. - preempt_enable();
  2566. + migrate_enable();
  2567. pr_cont("\n");
  2568. show_trace_log_lvl(task, regs, sp, bp, log_lvl);
  2569. diff -Nur linux-4.4.62.orig/arch/x86/kernel/irq_32.c linux-4.4.62/arch/x86/kernel/irq_32.c
  2570. --- linux-4.4.62.orig/arch/x86/kernel/irq_32.c 2017-04-18 07:15:37.000000000 +0200
  2571. +++ linux-4.4.62/arch/x86/kernel/irq_32.c 2017-04-18 17:38:08.034643040 +0200
  2572. @@ -128,6 +128,7 @@
  2573. cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu));
  2574. }
  2575. +#ifndef CONFIG_PREEMPT_RT_FULL
  2576. void do_softirq_own_stack(void)
  2577. {
  2578. struct thread_info *curstk;
  2579. @@ -146,6 +147,7 @@
  2580. call_on_stack(__do_softirq, isp);
  2581. }
  2582. +#endif
  2583. bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
  2584. {
  2585. diff -Nur linux-4.4.62.orig/arch/x86/kernel/kvm.c linux-4.4.62/arch/x86/kernel/kvm.c
  2586. --- linux-4.4.62.orig/arch/x86/kernel/kvm.c 2017-04-18 07:15:37.000000000 +0200
  2587. +++ linux-4.4.62/arch/x86/kernel/kvm.c 2017-04-18 17:38:08.034643040 +0200
  2588. @@ -36,6 +36,7 @@
  2589. #include <linux/kprobes.h>
  2590. #include <linux/debugfs.h>
  2591. #include <linux/nmi.h>
  2592. +#include <linux/swait.h>
  2593. #include <asm/timer.h>
  2594. #include <asm/cpu.h>
  2595. #include <asm/traps.h>
  2596. @@ -91,14 +92,14 @@
  2597. struct kvm_task_sleep_node {
  2598. struct hlist_node link;
  2599. - wait_queue_head_t wq;
  2600. + struct swait_queue_head wq;
  2601. u32 token;
  2602. int cpu;
  2603. bool halted;
  2604. };
  2605. static struct kvm_task_sleep_head {
  2606. - spinlock_t lock;
  2607. + raw_spinlock_t lock;
  2608. struct hlist_head list;
  2609. } async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE];
  2610. @@ -122,17 +123,17 @@
  2611. u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
  2612. struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
  2613. struct kvm_task_sleep_node n, *e;
  2614. - DEFINE_WAIT(wait);
  2615. + DECLARE_SWAITQUEUE(wait);
  2616. rcu_irq_enter();
  2617. - spin_lock(&b->lock);
  2618. + raw_spin_lock(&b->lock);
  2619. e = _find_apf_task(b, token);
  2620. if (e) {
  2621. /* dummy entry exist -> wake up was delivered ahead of PF */
  2622. hlist_del(&e->link);
  2623. kfree(e);
  2624. - spin_unlock(&b->lock);
  2625. + raw_spin_unlock(&b->lock);
  2626. rcu_irq_exit();
  2627. return;
  2628. @@ -141,13 +142,13 @@
  2629. n.token = token;
  2630. n.cpu = smp_processor_id();
  2631. n.halted = is_idle_task(current) || preempt_count() > 1;
  2632. - init_waitqueue_head(&n.wq);
  2633. + init_swait_queue_head(&n.wq);
  2634. hlist_add_head(&n.link, &b->list);
  2635. - spin_unlock(&b->lock);
  2636. + raw_spin_unlock(&b->lock);
  2637. for (;;) {
  2638. if (!n.halted)
  2639. - prepare_to_wait(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
  2640. + prepare_to_swait(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
  2641. if (hlist_unhashed(&n.link))
  2642. break;
  2643. @@ -166,7 +167,7 @@
  2644. }
  2645. }
  2646. if (!n.halted)
  2647. - finish_wait(&n.wq, &wait);
  2648. + finish_swait(&n.wq, &wait);
  2649. rcu_irq_exit();
  2650. return;
  2651. @@ -178,8 +179,8 @@
  2652. hlist_del_init(&n->link);
  2653. if (n->halted)
  2654. smp_send_reschedule(n->cpu);
  2655. - else if (waitqueue_active(&n->wq))
  2656. - wake_up(&n->wq);
  2657. + else if (swait_active(&n->wq))
  2658. + swake_up(&n->wq);
  2659. }
  2660. static void apf_task_wake_all(void)
  2661. @@ -189,14 +190,14 @@
  2662. for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
  2663. struct hlist_node *p, *next;
  2664. struct kvm_task_sleep_head *b = &async_pf_sleepers[i];
  2665. - spin_lock(&b->lock);
  2666. + raw_spin_lock(&b->lock);
  2667. hlist_for_each_safe(p, next, &b->list) {
  2668. struct kvm_task_sleep_node *n =
  2669. hlist_entry(p, typeof(*n), link);
  2670. if (n->cpu == smp_processor_id())
  2671. apf_task_wake_one(n);
  2672. }
  2673. - spin_unlock(&b->lock);
  2674. + raw_spin_unlock(&b->lock);
  2675. }
  2676. }
  2677. @@ -212,7 +213,7 @@
  2678. }
  2679. again:
  2680. - spin_lock(&b->lock);
  2681. + raw_spin_lock(&b->lock);
  2682. n = _find_apf_task(b, token);
  2683. if (!n) {
  2684. /*
  2685. @@ -225,17 +226,17 @@
  2686. * Allocation failed! Busy wait while other cpu
  2687. * handles async PF.
  2688. */
  2689. - spin_unlock(&b->lock);
  2690. + raw_spin_unlock(&b->lock);
  2691. cpu_relax();
  2692. goto again;
  2693. }
  2694. n->token = token;
  2695. n->cpu = smp_processor_id();
  2696. - init_waitqueue_head(&n->wq);
  2697. + init_swait_queue_head(&n->wq);
  2698. hlist_add_head(&n->link, &b->list);
  2699. } else
  2700. apf_task_wake_one(n);
  2701. - spin_unlock(&b->lock);
  2702. + raw_spin_unlock(&b->lock);
  2703. return;
  2704. }
  2705. EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
  2706. @@ -486,7 +487,7 @@
  2707. paravirt_ops_setup();
  2708. register_reboot_notifier(&kvm_pv_reboot_nb);
  2709. for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
  2710. - spin_lock_init(&async_pf_sleepers[i].lock);
  2711. + raw_spin_lock_init(&async_pf_sleepers[i].lock);
  2712. if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
  2713. x86_init.irqs.trap_init = kvm_apf_trap_init;
  2714. diff -Nur linux-4.4.62.orig/arch/x86/kernel/nmi.c linux-4.4.62/arch/x86/kernel/nmi.c
  2715. --- linux-4.4.62.orig/arch/x86/kernel/nmi.c 2017-04-18 07:15:37.000000000 +0200
  2716. +++ linux-4.4.62/arch/x86/kernel/nmi.c 2017-04-18 17:38:08.034643040 +0200
  2717. @@ -231,7 +231,7 @@
  2718. #endif
  2719. if (panic_on_unrecovered_nmi)
  2720. - panic("NMI: Not continuing");
  2721. + nmi_panic(regs, "NMI: Not continuing");
  2722. pr_emerg("Dazed and confused, but trying to continue\n");
  2723. @@ -255,8 +255,16 @@
  2724. reason, smp_processor_id());
  2725. show_regs(regs);
  2726. - if (panic_on_io_nmi)
  2727. - panic("NMI IOCK error: Not continuing");
  2728. + if (panic_on_io_nmi) {
  2729. + nmi_panic(regs, "NMI IOCK error: Not continuing");
  2730. +
  2731. + /*
  2732. + * If we end up here, it means we have received an NMI while
  2733. + * processing panic(). Simply return without delaying and
  2734. + * re-enabling NMIs.
  2735. + */
  2736. + return;
  2737. + }
  2738. /* Re-enable the IOCK line, wait for a few seconds */
  2739. reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
  2740. @@ -297,7 +305,7 @@
  2741. pr_emerg("Do you have a strange power saving mode enabled?\n");
  2742. if (unknown_nmi_panic || panic_on_unrecovered_nmi)
  2743. - panic("NMI: Not continuing");
  2744. + nmi_panic(regs, "NMI: Not continuing");
  2745. pr_emerg("Dazed and confused, but trying to continue\n");
  2746. }
  2747. diff -Nur linux-4.4.62.orig/arch/x86/kernel/process_32.c linux-4.4.62/arch/x86/kernel/process_32.c
  2748. --- linux-4.4.62.orig/arch/x86/kernel/process_32.c 2017-04-18 07:15:37.000000000 +0200
  2749. +++ linux-4.4.62/arch/x86/kernel/process_32.c 2017-04-18 17:38:08.034643040 +0200
  2750. @@ -35,6 +35,7 @@
  2751. #include <linux/uaccess.h>
  2752. #include <linux/io.h>
  2753. #include <linux/kdebug.h>
  2754. +#include <linux/highmem.h>
  2755. #include <asm/pgtable.h>
  2756. #include <asm/ldt.h>
  2757. @@ -210,6 +211,35 @@
  2758. }
  2759. EXPORT_SYMBOL_GPL(start_thread);
  2760. +#ifdef CONFIG_PREEMPT_RT_FULL
  2761. +static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
  2762. +{
  2763. + int i;
  2764. +
  2765. + /*
  2766. + * Clear @prev's kmap_atomic mappings
  2767. + */
  2768. + for (i = 0; i < prev_p->kmap_idx; i++) {
  2769. + int idx = i + KM_TYPE_NR * smp_processor_id();
  2770. + pte_t *ptep = kmap_pte - idx;
  2771. +
  2772. + kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx));
  2773. + }
  2774. + /*
  2775. + * Restore @next_p's kmap_atomic mappings
  2776. + */
  2777. + for (i = 0; i < next_p->kmap_idx; i++) {
  2778. + int idx = i + KM_TYPE_NR * smp_processor_id();
  2779. +
  2780. + if (!pte_none(next_p->kmap_pte[i]))
  2781. + set_pte(kmap_pte - idx, next_p->kmap_pte[i]);
  2782. + }
  2783. +}
  2784. +#else
  2785. +static inline void
  2786. +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
  2787. +#endif
  2788. +
  2789. /*
  2790. * switch_to(x,y) should switch tasks from x to y.
  2791. @@ -286,6 +316,8 @@
  2792. task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
  2793. __switch_to_xtra(prev_p, next_p, tss);
  2794. + switch_kmaps(prev_p, next_p);
  2795. +
  2796. /*
  2797. * Leave lazy mode, flushing any hypercalls made here.
  2798. * This must be done before restoring TLS segments so
  2799. diff -Nur linux-4.4.62.orig/arch/x86/kernel/reboot.c linux-4.4.62/arch/x86/kernel/reboot.c
  2800. --- linux-4.4.62.orig/arch/x86/kernel/reboot.c 2017-04-18 07:15:37.000000000 +0200
  2801. +++ linux-4.4.62/arch/x86/kernel/reboot.c 2017-04-18 17:38:08.034643040 +0200
  2802. @@ -726,6 +726,7 @@
  2803. static nmi_shootdown_cb shootdown_callback;
  2804. static atomic_t waiting_for_crash_ipi;
  2805. +static int crash_ipi_issued;
  2806. static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
  2807. {
  2808. @@ -788,6 +789,9 @@
  2809. smp_send_nmi_allbutself();
  2810. + /* Kick CPUs looping in NMI context. */
  2811. + WRITE_ONCE(crash_ipi_issued, 1);
  2812. +
  2813. msecs = 1000; /* Wait at most a second for the other cpus to stop */
  2814. while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
  2815. mdelay(1);
  2816. @@ -796,6 +800,22 @@
  2817. /* Leave the nmi callback set */
  2818. }
  2819. +
  2820. +/* Override the weak function in kernel/panic.c */
  2821. +void nmi_panic_self_stop(struct pt_regs *regs)
  2822. +{
  2823. + while (1) {
  2824. + /*
  2825. + * Wait for the crash dumping IPI to be issued, and then
  2826. + * call its callback directly.
  2827. + */
  2828. + if (READ_ONCE(crash_ipi_issued))
  2829. + crash_nmi_callback(0, regs); /* Don't return */
  2830. +
  2831. + cpu_relax();
  2832. + }
  2833. +}
  2834. +
  2835. #else /* !CONFIG_SMP */
  2836. void nmi_shootdown_cpus(nmi_shootdown_cb callback)
  2837. {
  2838. diff -Nur linux-4.4.62.orig/arch/x86/kvm/lapic.c linux-4.4.62/arch/x86/kvm/lapic.c
  2839. --- linux-4.4.62.orig/arch/x86/kvm/lapic.c 2017-04-18 07:15:37.000000000 +0200
  2840. +++ linux-4.4.62/arch/x86/kvm/lapic.c 2017-04-18 17:38:08.038643196 +0200
  2841. @@ -1195,7 +1195,7 @@
  2842. static void apic_timer_expired(struct kvm_lapic *apic)
  2843. {
  2844. struct kvm_vcpu *vcpu = apic->vcpu;
  2845. - wait_queue_head_t *q = &vcpu->wq;
  2846. + struct swait_queue_head *q = &vcpu->wq;
  2847. struct kvm_timer *ktimer = &apic->lapic_timer;
  2848. if (atomic_read(&apic->lapic_timer.pending))
  2849. @@ -1204,8 +1204,8 @@
  2850. atomic_inc(&apic->lapic_timer.pending);
  2851. kvm_set_pending_timer(vcpu);
  2852. - if (waitqueue_active(q))
  2853. - wake_up_interruptible(q);
  2854. + if (swait_active(q))
  2855. + swake_up(q);
  2856. if (apic_lvtt_tscdeadline(apic))
  2857. ktimer->expired_tscdeadline = ktimer->tscdeadline;
  2858. @@ -1801,6 +1801,7 @@
  2859. hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
  2860. HRTIMER_MODE_ABS);
  2861. apic->lapic_timer.timer.function = apic_timer_fn;
  2862. + apic->lapic_timer.timer.irqsafe = 1;
  2863. /*
  2864. * APIC is created enabled. This will prevent kvm_lapic_set_base from
  2865. diff -Nur linux-4.4.62.orig/arch/x86/kvm/x86.c linux-4.4.62/arch/x86/kvm/x86.c
  2866. --- linux-4.4.62.orig/arch/x86/kvm/x86.c 2017-04-18 07:15:37.000000000 +0200
  2867. +++ linux-4.4.62/arch/x86/kvm/x86.c 2017-04-18 17:38:08.038643196 +0200
  2868. @@ -5810,6 +5810,13 @@
  2869. goto out;
  2870. }
  2871. +#ifdef CONFIG_PREEMPT_RT_FULL
  2872. + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
  2873. + printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n");
  2874. + return -EOPNOTSUPP;
  2875. + }
  2876. +#endif
  2877. +
  2878. r = kvm_mmu_module_init();
  2879. if (r)
  2880. goto out_free_percpu;
  2881. diff -Nur linux-4.4.62.orig/arch/x86/mm/highmem_32.c linux-4.4.62/arch/x86/mm/highmem_32.c
  2882. --- linux-4.4.62.orig/arch/x86/mm/highmem_32.c 2017-04-18 07:15:37.000000000 +0200
  2883. +++ linux-4.4.62/arch/x86/mm/highmem_32.c 2017-04-18 17:38:08.038643196 +0200
  2884. @@ -32,10 +32,11 @@
  2885. */
  2886. void *kmap_atomic_prot(struct page *page, pgprot_t prot)
  2887. {
  2888. + pte_t pte = mk_pte(page, prot);
  2889. unsigned long vaddr;
  2890. int idx, type;
  2891. - preempt_disable();
  2892. + preempt_disable_nort();
  2893. pagefault_disable();
  2894. if (!PageHighMem(page))
  2895. @@ -45,7 +46,10 @@
  2896. idx = type + KM_TYPE_NR*smp_processor_id();
  2897. vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
  2898. BUG_ON(!pte_none(*(kmap_pte-idx)));
  2899. - set_pte(kmap_pte-idx, mk_pte(page, prot));
  2900. +#ifdef CONFIG_PREEMPT_RT_FULL
  2901. + current->kmap_pte[type] = pte;
  2902. +#endif
  2903. + set_pte(kmap_pte-idx, pte);
  2904. arch_flush_lazy_mmu_mode();
  2905. return (void *)vaddr;
  2906. @@ -88,6 +92,9 @@
  2907. * is a bad idea also, in case the page changes cacheability
  2908. * attributes or becomes a protected page in a hypervisor.
  2909. */
  2910. +#ifdef CONFIG_PREEMPT_RT_FULL
  2911. + current->kmap_pte[type] = __pte(0);
  2912. +#endif
  2913. kpte_clear_flush(kmap_pte-idx, vaddr);
  2914. kmap_atomic_idx_pop();
  2915. arch_flush_lazy_mmu_mode();
  2916. @@ -100,7 +107,7 @@
  2917. #endif
  2918. pagefault_enable();
  2919. - preempt_enable();
  2920. + preempt_enable_nort();
  2921. }
  2922. EXPORT_SYMBOL(__kunmap_atomic);
  2923. diff -Nur linux-4.4.62.orig/arch/x86/mm/iomap_32.c linux-4.4.62/arch/x86/mm/iomap_32.c
  2924. --- linux-4.4.62.orig/arch/x86/mm/iomap_32.c 2017-04-18 07:15:37.000000000 +0200
  2925. +++ linux-4.4.62/arch/x86/mm/iomap_32.c 2017-04-18 17:38:08.038643196 +0200
  2926. @@ -56,6 +56,7 @@
  2927. void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
  2928. {
  2929. + pte_t pte = pfn_pte(pfn, prot);
  2930. unsigned long vaddr;
  2931. int idx, type;
  2932. @@ -65,7 +66,12 @@
  2933. type = kmap_atomic_idx_push();
  2934. idx = type + KM_TYPE_NR * smp_processor_id();
  2935. vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
  2936. - set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
  2937. + WARN_ON(!pte_none(*(kmap_pte - idx)));
  2938. +
  2939. +#ifdef CONFIG_PREEMPT_RT_FULL
  2940. + current->kmap_pte[type] = pte;
  2941. +#endif
  2942. + set_pte(kmap_pte - idx, pte);
  2943. arch_flush_lazy_mmu_mode();
  2944. return (void *)vaddr;
  2945. @@ -113,6 +119,9 @@
  2946. * is a bad idea also, in case the page changes cacheability
  2947. * attributes or becomes a protected page in a hypervisor.
  2948. */
  2949. +#ifdef CONFIG_PREEMPT_RT_FULL
  2950. + current->kmap_pte[type] = __pte(0);
  2951. +#endif
  2952. kpte_clear_flush(kmap_pte-idx, vaddr);
  2953. kmap_atomic_idx_pop();
  2954. }
  2955. diff -Nur linux-4.4.62.orig/arch/x86/mm/pageattr.c linux-4.4.62/arch/x86/mm/pageattr.c
  2956. --- linux-4.4.62.orig/arch/x86/mm/pageattr.c 2017-04-18 07:15:37.000000000 +0200
  2957. +++ linux-4.4.62/arch/x86/mm/pageattr.c 2017-04-18 17:38:08.038643196 +0200
  2958. @@ -208,7 +208,15 @@
  2959. int in_flags, struct page **pages)
  2960. {
  2961. unsigned int i, level;
  2962. +#ifdef CONFIG_PREEMPT
  2963. + /*
  2964. + * Avoid wbinvd() because it causes latencies on all CPUs,
  2965. + * regardless of any CPU isolation that may be in effect.
  2966. + */
  2967. + unsigned long do_wbinvd = 0;
  2968. +#else
  2969. unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */
  2970. +#endif
  2971. BUG_ON(irqs_disabled());
  2972. diff -Nur linux-4.4.62.orig/arch/x86/platform/uv/tlb_uv.c linux-4.4.62/arch/x86/platform/uv/tlb_uv.c
  2973. --- linux-4.4.62.orig/arch/x86/platform/uv/tlb_uv.c 2017-04-18 07:15:37.000000000 +0200
  2974. +++ linux-4.4.62/arch/x86/platform/uv/tlb_uv.c 2017-04-18 17:38:08.038643196 +0200
  2975. @@ -714,9 +714,9 @@
  2976. quiesce_local_uvhub(hmaster);
  2977. - spin_lock(&hmaster->queue_lock);
  2978. + raw_spin_lock(&hmaster->queue_lock);
  2979. reset_with_ipi(&bau_desc->distribution, bcp);
  2980. - spin_unlock(&hmaster->queue_lock);
  2981. + raw_spin_unlock(&hmaster->queue_lock);
  2982. end_uvhub_quiesce(hmaster);
  2983. @@ -736,9 +736,9 @@
  2984. quiesce_local_uvhub(hmaster);
  2985. - spin_lock(&hmaster->queue_lock);
  2986. + raw_spin_lock(&hmaster->queue_lock);
  2987. reset_with_ipi(&bau_desc->distribution, bcp);
  2988. - spin_unlock(&hmaster->queue_lock);
  2989. + raw_spin_unlock(&hmaster->queue_lock);
  2990. end_uvhub_quiesce(hmaster);
  2991. @@ -759,7 +759,7 @@
  2992. cycles_t tm1;
  2993. hmaster = bcp->uvhub_master;
  2994. - spin_lock(&hmaster->disable_lock);
  2995. + raw_spin_lock(&hmaster->disable_lock);
  2996. if (!bcp->baudisabled) {
  2997. stat->s_bau_disabled++;
  2998. tm1 = get_cycles();
  2999. @@ -772,7 +772,7 @@
  3000. }
  3001. }
  3002. }
  3003. - spin_unlock(&hmaster->disable_lock);
  3004. + raw_spin_unlock(&hmaster->disable_lock);
  3005. }
  3006. static void count_max_concurr(int stat, struct bau_control *bcp,
  3007. @@ -835,7 +835,7 @@
  3008. */
  3009. static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat)
  3010. {
  3011. - spinlock_t *lock = &hmaster->uvhub_lock;
  3012. + raw_spinlock_t *lock = &hmaster->uvhub_lock;
  3013. atomic_t *v;
  3014. v = &hmaster->active_descriptor_count;
  3015. @@ -968,7 +968,7 @@
  3016. struct bau_control *hmaster;
  3017. hmaster = bcp->uvhub_master;
  3018. - spin_lock(&hmaster->disable_lock);
  3019. + raw_spin_lock(&hmaster->disable_lock);
  3020. if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) {
  3021. stat->s_bau_reenabled++;
  3022. for_each_present_cpu(tcpu) {
  3023. @@ -980,10 +980,10 @@
  3024. tbcp->period_giveups = 0;
  3025. }
  3026. }
  3027. - spin_unlock(&hmaster->disable_lock);
  3028. + raw_spin_unlock(&hmaster->disable_lock);
  3029. return 0;
  3030. }
  3031. - spin_unlock(&hmaster->disable_lock);
  3032. + raw_spin_unlock(&hmaster->disable_lock);
  3033. return -1;
  3034. }
  3035. @@ -1901,9 +1901,9 @@
  3036. bcp->cong_reps = congested_reps;
  3037. bcp->disabled_period = sec_2_cycles(disabled_period);
  3038. bcp->giveup_limit = giveup_limit;
  3039. - spin_lock_init(&bcp->queue_lock);
  3040. - spin_lock_init(&bcp->uvhub_lock);
  3041. - spin_lock_init(&bcp->disable_lock);
  3042. + raw_spin_lock_init(&bcp->queue_lock);
  3043. + raw_spin_lock_init(&bcp->uvhub_lock);
  3044. + raw_spin_lock_init(&bcp->disable_lock);
  3045. }
  3046. }
  3047. diff -Nur linux-4.4.62.orig/arch/x86/platform/uv/uv_time.c linux-4.4.62/arch/x86/platform/uv/uv_time.c
  3048. --- linux-4.4.62.orig/arch/x86/platform/uv/uv_time.c 2017-04-18 07:15:37.000000000 +0200
  3049. +++ linux-4.4.62/arch/x86/platform/uv/uv_time.c 2017-04-18 17:38:08.038643196 +0200
  3050. @@ -57,7 +57,7 @@
  3051. /* There is one of these allocated per node */
  3052. struct uv_rtc_timer_head {
  3053. - spinlock_t lock;
  3054. + raw_spinlock_t lock;
  3055. /* next cpu waiting for timer, local node relative: */
  3056. int next_cpu;
  3057. /* number of cpus on this node: */
  3058. @@ -177,7 +177,7 @@
  3059. uv_rtc_deallocate_timers();
  3060. return -ENOMEM;
  3061. }
  3062. - spin_lock_init(&head->lock);
  3063. + raw_spin_lock_init(&head->lock);
  3064. head->ncpus = uv_blade_nr_possible_cpus(bid);
  3065. head->next_cpu = -1;
  3066. blade_info[bid] = head;
  3067. @@ -231,7 +231,7 @@
  3068. unsigned long flags;
  3069. int next_cpu;
  3070. - spin_lock_irqsave(&head->lock, flags);
  3071. + raw_spin_lock_irqsave(&head->lock, flags);
  3072. next_cpu = head->next_cpu;
  3073. *t = expires;
  3074. @@ -243,12 +243,12 @@
  3075. if (uv_setup_intr(cpu, expires)) {
  3076. *t = ULLONG_MAX;
  3077. uv_rtc_find_next_timer(head, pnode);
  3078. - spin_unlock_irqrestore(&head->lock, flags);
  3079. + raw_spin_unlock_irqrestore(&head->lock, flags);
  3080. return -ETIME;
  3081. }
  3082. }
  3083. - spin_unlock_irqrestore(&head->lock, flags);
  3084. + raw_spin_unlock_irqrestore(&head->lock, flags);
  3085. return 0;
  3086. }
  3087. @@ -267,7 +267,7 @@
  3088. unsigned long flags;
  3089. int rc = 0;
  3090. - spin_lock_irqsave(&head->lock, flags);
  3091. + raw_spin_lock_irqsave(&head->lock, flags);
  3092. if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force)
  3093. rc = 1;
  3094. @@ -279,7 +279,7 @@
  3095. uv_rtc_find_next_timer(head, pnode);
  3096. }
  3097. - spin_unlock_irqrestore(&head->lock, flags);
  3098. + raw_spin_unlock_irqrestore(&head->lock, flags);
  3099. return rc;
  3100. }
  3101. @@ -299,13 +299,18 @@
  3102. static cycle_t uv_read_rtc(struct clocksource *cs)
  3103. {
  3104. unsigned long offset;
  3105. + cycle_t cycles;
  3106. + preempt_disable();
  3107. if (uv_get_min_hub_revision_id() == 1)
  3108. offset = 0;
  3109. else
  3110. offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE;
  3111. - return (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
  3112. + cycles = (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
  3113. + preempt_enable();
  3114. +
  3115. + return cycles;
  3116. }
  3117. /*
  3118. diff -Nur linux-4.4.62.orig/block/blk-core.c linux-4.4.62/block/blk-core.c
  3119. --- linux-4.4.62.orig/block/blk-core.c 2017-04-18 07:15:37.000000000 +0200
  3120. +++ linux-4.4.62/block/blk-core.c 2017-04-18 17:38:08.038643196 +0200
  3121. @@ -125,6 +125,9 @@
  3122. INIT_LIST_HEAD(&rq->queuelist);
  3123. INIT_LIST_HEAD(&rq->timeout_list);
  3124. +#ifdef CONFIG_PREEMPT_RT_FULL
  3125. + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work);
  3126. +#endif
  3127. rq->cpu = -1;
  3128. rq->q = q;
  3129. rq->__sector = (sector_t) -1;
  3130. @@ -233,7 +236,7 @@
  3131. **/
  3132. void blk_start_queue(struct request_queue *q)
  3133. {
  3134. - WARN_ON(!irqs_disabled());
  3135. + WARN_ON_NONRT(!irqs_disabled());
  3136. queue_flag_clear(QUEUE_FLAG_STOPPED, q);
  3137. __blk_run_queue(q);
  3138. @@ -659,7 +662,7 @@
  3139. if (!gfpflags_allow_blocking(gfp))
  3140. return -EBUSY;
  3141. - ret = wait_event_interruptible(q->mq_freeze_wq,
  3142. + ret = swait_event_interruptible(q->mq_freeze_wq,
  3143. !atomic_read(&q->mq_freeze_depth) ||
  3144. blk_queue_dying(q));
  3145. if (blk_queue_dying(q))
  3146. @@ -679,7 +682,7 @@
  3147. struct request_queue *q =
  3148. container_of(ref, struct request_queue, q_usage_counter);
  3149. - wake_up_all(&q->mq_freeze_wq);
  3150. + swake_up_all(&q->mq_freeze_wq);
  3151. }
  3152. struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
  3153. @@ -741,7 +744,7 @@
  3154. q->bypass_depth = 1;
  3155. __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
  3156. - init_waitqueue_head(&q->mq_freeze_wq);
  3157. + init_swait_queue_head(&q->mq_freeze_wq);
  3158. /*
  3159. * Init percpu_ref in atomic mode so that it's faster to shutdown.
  3160. @@ -3222,7 +3225,7 @@
  3161. blk_run_queue_async(q);
  3162. else
  3163. __blk_run_queue(q);
  3164. - spin_unlock(q->queue_lock);
  3165. + spin_unlock_irq(q->queue_lock);
  3166. }
  3167. static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
  3168. @@ -3270,7 +3273,6 @@
  3169. void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
  3170. {
  3171. struct request_queue *q;
  3172. - unsigned long flags;
  3173. struct request *rq;
  3174. LIST_HEAD(list);
  3175. unsigned int depth;
  3176. @@ -3290,11 +3292,6 @@
  3177. q = NULL;
  3178. depth = 0;
  3179. - /*
  3180. - * Save and disable interrupts here, to avoid doing it for every
  3181. - * queue lock we have to take.
  3182. - */
  3183. - local_irq_save(flags);
  3184. while (!list_empty(&list)) {
  3185. rq = list_entry_rq(list.next);
  3186. list_del_init(&rq->queuelist);
  3187. @@ -3307,7 +3304,7 @@
  3188. queue_unplugged(q, depth, from_schedule);
  3189. q = rq->q;
  3190. depth = 0;
  3191. - spin_lock(q->queue_lock);
  3192. + spin_lock_irq(q->queue_lock);
  3193. }
  3194. /*
  3195. @@ -3334,8 +3331,6 @@
  3196. */
  3197. if (q)
  3198. queue_unplugged(q, depth, from_schedule);
  3199. -
  3200. - local_irq_restore(flags);
  3201. }
  3202. void blk_finish_plug(struct blk_plug *plug)
  3203. diff -Nur linux-4.4.62.orig/block/blk-ioc.c linux-4.4.62/block/blk-ioc.c
  3204. --- linux-4.4.62.orig/block/blk-ioc.c 2017-04-18 07:15:37.000000000 +0200
  3205. +++ linux-4.4.62/block/blk-ioc.c 2017-04-18 17:38:08.038643196 +0200
  3206. @@ -7,6 +7,7 @@
  3207. #include <linux/bio.h>
  3208. #include <linux/blkdev.h>
  3209. #include <linux/slab.h>
  3210. +#include <linux/delay.h>
  3211. #include "blk.h"
  3212. @@ -109,7 +110,7 @@
  3213. spin_unlock(q->queue_lock);
  3214. } else {
  3215. spin_unlock_irqrestore(&ioc->lock, flags);
  3216. - cpu_relax();
  3217. + cpu_chill();
  3218. spin_lock_irqsave_nested(&ioc->lock, flags, 1);
  3219. }
  3220. }
  3221. @@ -187,7 +188,7 @@
  3222. spin_unlock(icq->q->queue_lock);
  3223. } else {
  3224. spin_unlock_irqrestore(&ioc->lock, flags);
  3225. - cpu_relax();
  3226. + cpu_chill();
  3227. goto retry;
  3228. }
  3229. }
  3230. diff -Nur linux-4.4.62.orig/block/blk-iopoll.c linux-4.4.62/block/blk-iopoll.c
  3231. --- linux-4.4.62.orig/block/blk-iopoll.c 2017-04-18 07:15:37.000000000 +0200
  3232. +++ linux-4.4.62/block/blk-iopoll.c 2017-04-18 17:38:08.038643196 +0200
  3233. @@ -35,6 +35,7 @@
  3234. list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll));
  3235. __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
  3236. local_irq_restore(flags);
  3237. + preempt_check_resched_rt();
  3238. }
  3239. EXPORT_SYMBOL(blk_iopoll_sched);
  3240. @@ -132,6 +133,7 @@
  3241. __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
  3242. local_irq_enable();
  3243. + preempt_check_resched_rt();
  3244. }
  3245. /**
  3246. @@ -201,6 +203,7 @@
  3247. this_cpu_ptr(&blk_cpu_iopoll));
  3248. __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
  3249. local_irq_enable();
  3250. + preempt_check_resched_rt();
  3251. }
  3252. return NOTIFY_OK;
  3253. diff -Nur linux-4.4.62.orig/block/blk-mq.c linux-4.4.62/block/blk-mq.c
  3254. --- linux-4.4.62.orig/block/blk-mq.c 2017-04-18 07:15:37.000000000 +0200
  3255. +++ linux-4.4.62/block/blk-mq.c 2017-04-18 17:38:08.038643196 +0200
  3256. @@ -92,7 +92,7 @@
  3257. static void blk_mq_freeze_queue_wait(struct request_queue *q)
  3258. {
  3259. - wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
  3260. + swait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
  3261. }
  3262. /*
  3263. @@ -130,7 +130,7 @@
  3264. WARN_ON_ONCE(freeze_depth < 0);
  3265. if (!freeze_depth) {
  3266. percpu_ref_reinit(&q->q_usage_counter);
  3267. - wake_up_all(&q->mq_freeze_wq);
  3268. + swake_up_all(&q->mq_freeze_wq);
  3269. }
  3270. }
  3271. EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
  3272. @@ -149,7 +149,7 @@
  3273. * dying, we need to ensure that processes currently waiting on
  3274. * the queue are notified as well.
  3275. */
  3276. - wake_up_all(&q->mq_freeze_wq);
  3277. + swake_up_all(&q->mq_freeze_wq);
  3278. }
  3279. bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
  3280. @@ -196,6 +196,9 @@
  3281. rq->resid_len = 0;
  3282. rq->sense = NULL;
  3283. +#ifdef CONFIG_PREEMPT_RT_FULL
  3284. + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work);
  3285. +#endif
  3286. INIT_LIST_HEAD(&rq->timeout_list);
  3287. rq->timeout = 0;
  3288. @@ -325,6 +328,17 @@
  3289. }
  3290. EXPORT_SYMBOL(blk_mq_end_request);
  3291. +#ifdef CONFIG_PREEMPT_RT_FULL
  3292. +
  3293. +void __blk_mq_complete_request_remote_work(struct work_struct *work)
  3294. +{
  3295. + struct request *rq = container_of(work, struct request, work);
  3296. +
  3297. + rq->q->softirq_done_fn(rq);
  3298. +}
  3299. +
  3300. +#else
  3301. +
  3302. static void __blk_mq_complete_request_remote(void *data)
  3303. {
  3304. struct request *rq = data;
  3305. @@ -332,6 +346,8 @@
  3306. rq->q->softirq_done_fn(rq);
  3307. }
  3308. +#endif
  3309. +
  3310. static void blk_mq_ipi_complete_request(struct request *rq)
  3311. {
  3312. struct blk_mq_ctx *ctx = rq->mq_ctx;
  3313. @@ -343,19 +359,23 @@
  3314. return;
  3315. }
  3316. - cpu = get_cpu();
  3317. + cpu = get_cpu_light();
  3318. if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags))
  3319. shared = cpus_share_cache(cpu, ctx->cpu);
  3320. if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
  3321. +#ifdef CONFIG_PREEMPT_RT_FULL
  3322. + schedule_work_on(ctx->cpu, &rq->work);
  3323. +#else
  3324. rq->csd.func = __blk_mq_complete_request_remote;
  3325. rq->csd.info = rq;
  3326. rq->csd.flags = 0;
  3327. smp_call_function_single_async(ctx->cpu, &rq->csd);
  3328. +#endif
  3329. } else {
  3330. rq->q->softirq_done_fn(rq);
  3331. }
  3332. - put_cpu();
  3333. + put_cpu_light();
  3334. }
  3335. static void __blk_mq_complete_request(struct request *rq)
  3336. @@ -862,14 +882,14 @@
  3337. return;
  3338. if (!async) {
  3339. - int cpu = get_cpu();
  3340. + int cpu = get_cpu_light();
  3341. if (cpumask_test_cpu(cpu, hctx->cpumask)) {
  3342. __blk_mq_run_hw_queue(hctx);
  3343. - put_cpu();
  3344. + put_cpu_light();
  3345. return;
  3346. }
  3347. - put_cpu();
  3348. + put_cpu_light();
  3349. }
  3350. kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
  3351. @@ -1616,7 +1636,7 @@
  3352. {
  3353. struct blk_mq_hw_ctx *hctx = data;
  3354. - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
  3355. + if (action == CPU_POST_DEAD)
  3356. return blk_mq_hctx_cpu_offline(hctx, cpu);
  3357. /*
  3358. diff -Nur linux-4.4.62.orig/block/blk-mq-cpu.c linux-4.4.62/block/blk-mq-cpu.c
  3359. --- linux-4.4.62.orig/block/blk-mq-cpu.c 2017-04-18 07:15:37.000000000 +0200
  3360. +++ linux-4.4.62/block/blk-mq-cpu.c 2017-04-18 17:38:08.038643196 +0200
  3361. @@ -16,7 +16,7 @@
  3362. #include "blk-mq.h"
  3363. static LIST_HEAD(blk_mq_cpu_notify_list);
  3364. -static DEFINE_RAW_SPINLOCK(blk_mq_cpu_notify_lock);
  3365. +static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock);
  3366. static int blk_mq_main_cpu_notify(struct notifier_block *self,
  3367. unsigned long action, void *hcpu)
  3368. @@ -25,7 +25,10 @@
  3369. struct blk_mq_cpu_notifier *notify;
  3370. int ret = NOTIFY_OK;
  3371. - raw_spin_lock(&blk_mq_cpu_notify_lock);
  3372. + if (action != CPU_POST_DEAD)
  3373. + return NOTIFY_OK;
  3374. +
  3375. + spin_lock(&blk_mq_cpu_notify_lock);
  3376. list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) {
  3377. ret = notify->notify(notify->data, action, cpu);
  3378. @@ -33,7 +36,7 @@
  3379. break;
  3380. }
  3381. - raw_spin_unlock(&blk_mq_cpu_notify_lock);
  3382. + spin_unlock(&blk_mq_cpu_notify_lock);
  3383. return ret;
  3384. }
  3385. @@ -41,16 +44,16 @@
  3386. {
  3387. BUG_ON(!notifier->notify);
  3388. - raw_spin_lock(&blk_mq_cpu_notify_lock);
  3389. + spin_lock(&blk_mq_cpu_notify_lock);
  3390. list_add_tail(&notifier->list, &blk_mq_cpu_notify_list);
  3391. - raw_spin_unlock(&blk_mq_cpu_notify_lock);
  3392. + spin_unlock(&blk_mq_cpu_notify_lock);
  3393. }
  3394. void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
  3395. {
  3396. - raw_spin_lock(&blk_mq_cpu_notify_lock);
  3397. + spin_lock(&blk_mq_cpu_notify_lock);
  3398. list_del(&notifier->list);
  3399. - raw_spin_unlock(&blk_mq_cpu_notify_lock);
  3400. + spin_unlock(&blk_mq_cpu_notify_lock);
  3401. }
  3402. void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
  3403. diff -Nur linux-4.4.62.orig/block/blk-mq.h linux-4.4.62/block/blk-mq.h
  3404. --- linux-4.4.62.orig/block/blk-mq.h 2017-04-18 07:15:37.000000000 +0200
  3405. +++ linux-4.4.62/block/blk-mq.h 2017-04-18 17:38:08.038643196 +0200
  3406. @@ -74,7 +74,10 @@
  3407. static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
  3408. unsigned int cpu)
  3409. {
  3410. - return per_cpu_ptr(q->queue_ctx, cpu);
  3411. + struct blk_mq_ctx *ctx;
  3412. +
  3413. + ctx = per_cpu_ptr(q->queue_ctx, cpu);
  3414. + return ctx;
  3415. }
  3416. /*
  3417. @@ -85,12 +88,12 @@
  3418. */
  3419. static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q)
  3420. {
  3421. - return __blk_mq_get_ctx(q, get_cpu());
  3422. + return __blk_mq_get_ctx(q, get_cpu_light());
  3423. }
  3424. static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
  3425. {
  3426. - put_cpu();
  3427. + put_cpu_light();
  3428. }
  3429. struct blk_mq_alloc_data {
  3430. diff -Nur linux-4.4.62.orig/block/blk-softirq.c linux-4.4.62/block/blk-softirq.c
  3431. --- linux-4.4.62.orig/block/blk-softirq.c 2017-04-18 07:15:37.000000000 +0200
  3432. +++ linux-4.4.62/block/blk-softirq.c 2017-04-18 17:38:08.038643196 +0200
  3433. @@ -51,6 +51,7 @@
  3434. raise_softirq_irqoff(BLOCK_SOFTIRQ);
  3435. local_irq_restore(flags);
  3436. + preempt_check_resched_rt();
  3437. }
  3438. /*
  3439. @@ -93,6 +94,7 @@
  3440. this_cpu_ptr(&blk_cpu_done));
  3441. raise_softirq_irqoff(BLOCK_SOFTIRQ);
  3442. local_irq_enable();
  3443. + preempt_check_resched_rt();
  3444. }
  3445. return NOTIFY_OK;
  3446. @@ -150,6 +152,7 @@
  3447. goto do_local;
  3448. local_irq_restore(flags);
  3449. + preempt_check_resched_rt();
  3450. }
  3451. /**
  3452. diff -Nur linux-4.4.62.orig/block/bounce.c linux-4.4.62/block/bounce.c
  3453. --- linux-4.4.62.orig/block/bounce.c 2017-04-18 07:15:37.000000000 +0200
  3454. +++ linux-4.4.62/block/bounce.c 2017-04-18 17:38:08.038643196 +0200
  3455. @@ -55,11 +55,11 @@
  3456. unsigned long flags;
  3457. unsigned char *vto;
  3458. - local_irq_save(flags);
  3459. + local_irq_save_nort(flags);
  3460. vto = kmap_atomic(to->bv_page);
  3461. memcpy(vto + to->bv_offset, vfrom, to->bv_len);
  3462. kunmap_atomic(vto);
  3463. - local_irq_restore(flags);
  3464. + local_irq_restore_nort(flags);
  3465. }
  3466. #else /* CONFIG_HIGHMEM */
  3467. diff -Nur linux-4.4.62.orig/crypto/algapi.c linux-4.4.62/crypto/algapi.c
  3468. --- linux-4.4.62.orig/crypto/algapi.c 2017-04-18 07:15:37.000000000 +0200
  3469. +++ linux-4.4.62/crypto/algapi.c 2017-04-18 17:38:08.042643350 +0200
  3470. @@ -720,13 +720,13 @@
  3471. int crypto_register_notifier(struct notifier_block *nb)
  3472. {
  3473. - return blocking_notifier_chain_register(&crypto_chain, nb);
  3474. + return srcu_notifier_chain_register(&crypto_chain, nb);
  3475. }
  3476. EXPORT_SYMBOL_GPL(crypto_register_notifier);
  3477. int crypto_unregister_notifier(struct notifier_block *nb)
  3478. {
  3479. - return blocking_notifier_chain_unregister(&crypto_chain, nb);
  3480. + return srcu_notifier_chain_unregister(&crypto_chain, nb);
  3481. }
  3482. EXPORT_SYMBOL_GPL(crypto_unregister_notifier);
  3483. diff -Nur linux-4.4.62.orig/crypto/api.c linux-4.4.62/crypto/api.c
  3484. --- linux-4.4.62.orig/crypto/api.c 2017-04-18 07:15:37.000000000 +0200
  3485. +++ linux-4.4.62/crypto/api.c 2017-04-18 17:38:08.042643350 +0200
  3486. @@ -31,7 +31,7 @@
  3487. DECLARE_RWSEM(crypto_alg_sem);
  3488. EXPORT_SYMBOL_GPL(crypto_alg_sem);
  3489. -BLOCKING_NOTIFIER_HEAD(crypto_chain);
  3490. +SRCU_NOTIFIER_HEAD(crypto_chain);
  3491. EXPORT_SYMBOL_GPL(crypto_chain);
  3492. static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg);
  3493. @@ -236,10 +236,10 @@
  3494. {
  3495. int ok;
  3496. - ok = blocking_notifier_call_chain(&crypto_chain, val, v);
  3497. + ok = srcu_notifier_call_chain(&crypto_chain, val, v);
  3498. if (ok == NOTIFY_DONE) {
  3499. request_module("cryptomgr");
  3500. - ok = blocking_notifier_call_chain(&crypto_chain, val, v);
  3501. + ok = srcu_notifier_call_chain(&crypto_chain, val, v);
  3502. }
  3503. return ok;
  3504. diff -Nur linux-4.4.62.orig/crypto/internal.h linux-4.4.62/crypto/internal.h
  3505. --- linux-4.4.62.orig/crypto/internal.h 2017-04-18 07:15:37.000000000 +0200
  3506. +++ linux-4.4.62/crypto/internal.h 2017-04-18 17:38:08.042643350 +0200
  3507. @@ -47,7 +47,7 @@
  3508. extern struct list_head crypto_alg_list;
  3509. extern struct rw_semaphore crypto_alg_sem;
  3510. -extern struct blocking_notifier_head crypto_chain;
  3511. +extern struct srcu_notifier_head crypto_chain;
  3512. #ifdef CONFIG_PROC_FS
  3513. void __init crypto_init_proc(void);
  3514. @@ -143,7 +143,7 @@
  3515. static inline void crypto_notify(unsigned long val, void *v)
  3516. {
  3517. - blocking_notifier_call_chain(&crypto_chain, val, v);
  3518. + srcu_notifier_call_chain(&crypto_chain, val, v);
  3519. }
  3520. #endif /* _CRYPTO_INTERNAL_H */
  3521. diff -Nur linux-4.4.62.orig/Documentation/hwlat_detector.txt linux-4.4.62/Documentation/hwlat_detector.txt
  3522. --- linux-4.4.62.orig/Documentation/hwlat_detector.txt 1970-01-01 01:00:00.000000000 +0100
  3523. +++ linux-4.4.62/Documentation/hwlat_detector.txt 2017-04-18 17:38:07.902637922 +0200
  3524. @@ -0,0 +1,64 @@
  3525. +Introduction:
  3526. +-------------
  3527. +
  3528. +The module hwlat_detector is a special purpose kernel module that is used to
  3529. +detect large system latencies induced by the behavior of certain underlying
  3530. +hardware or firmware, independent of Linux itself. The code was developed
  3531. +originally to detect SMIs (System Management Interrupts) on x86 systems,
  3532. +however there is nothing x86 specific about this patchset. It was
  3533. +originally written for use by the "RT" patch since the Real Time
  3534. +kernel is highly latency sensitive.
  3535. +
  3536. +SMIs are usually not serviced by the Linux kernel, which typically does not
  3537. +even know that they are occuring. SMIs are instead are set up by BIOS code
  3538. +and are serviced by BIOS code, usually for "critical" events such as
  3539. +management of thermal sensors and fans. Sometimes though, SMIs are used for
  3540. +other tasks and those tasks can spend an inordinate amount of time in the
  3541. +handler (sometimes measured in milliseconds). Obviously this is a problem if
  3542. +you are trying to keep event service latencies down in the microsecond range.
  3543. +
  3544. +The hardware latency detector works by hogging all of the cpus for configurable
  3545. +amounts of time (by calling stop_machine()), polling the CPU Time Stamp Counter
  3546. +for some period, then looking for gaps in the TSC data. Any gap indicates a
  3547. +time when the polling was interrupted and since the machine is stopped and
  3548. +interrupts turned off the only thing that could do that would be an SMI.
  3549. +
  3550. +Note that the SMI detector should *NEVER* be used in a production environment.
  3551. +It is intended to be run manually to determine if the hardware platform has a
  3552. +problem with long system firmware service routines.
  3553. +
  3554. +Usage:
  3555. +------
  3556. +
  3557. +Loading the module hwlat_detector passing the parameter "enabled=1" (or by
  3558. +setting the "enable" entry in "hwlat_detector" debugfs toggled on) is the only
  3559. +step required to start the hwlat_detector. It is possible to redefine the
  3560. +threshold in microseconds (us) above which latency spikes will be taken
  3561. +into account (parameter "threshold=").
  3562. +
  3563. +Example:
  3564. +
  3565. + # modprobe hwlat_detector enabled=1 threshold=100
  3566. +
  3567. +After the module is loaded, it creates a directory named "hwlat_detector" under
  3568. +the debugfs mountpoint, "/debug/hwlat_detector" for this text. It is necessary
  3569. +to have debugfs mounted, which might be on /sys/debug on your system.
  3570. +
  3571. +The /debug/hwlat_detector interface contains the following files:
  3572. +
  3573. +count - number of latency spikes observed since last reset
  3574. +enable - a global enable/disable toggle (0/1), resets count
  3575. +max - maximum hardware latency actually observed (usecs)
  3576. +sample - a pipe from which to read current raw sample data
  3577. + in the format <timestamp> <latency observed usecs>
  3578. + (can be opened O_NONBLOCK for a single sample)
  3579. +threshold - minimum latency value to be considered (usecs)
  3580. +width - time period to sample with CPUs held (usecs)
  3581. + must be less than the total window size (enforced)
  3582. +window - total period of sampling, width being inside (usecs)
  3583. +
  3584. +By default we will set width to 500,000 and window to 1,000,000, meaning that
  3585. +we will sample every 1,000,000 usecs (1s) for 500,000 usecs (0.5s). If we
  3586. +observe any latencies that exceed the threshold (initially 100 usecs),
  3587. +then we write to a global sample ring buffer of 8K samples, which is
  3588. +consumed by reading from the "sample" (pipe) debugfs file interface.
  3589. diff -Nur linux-4.4.62.orig/Documentation/kernel-parameters.txt linux-4.4.62/Documentation/kernel-parameters.txt
  3590. --- linux-4.4.62.orig/Documentation/kernel-parameters.txt 2017-04-18 07:15:37.000000000 +0200
  3591. +++ linux-4.4.62/Documentation/kernel-parameters.txt 2017-04-18 17:38:07.902637922 +0200
  3592. @@ -1640,6 +1640,15 @@
  3593. ip= [IP_PNP]
  3594. See Documentation/filesystems/nfs/nfsroot.txt.
  3595. + irqaffinity= [SMP] Set the default irq affinity mask
  3596. + Format:
  3597. + <cpu number>,...,<cpu number>
  3598. + or
  3599. + <cpu number>-<cpu number>
  3600. + (must be a positive range in ascending order)
  3601. + or a mixture
  3602. + <cpu number>,...,<cpu number>-<cpu number>
  3603. +
  3604. irqfixup [HW]
  3605. When an interrupt is not handled search all handlers
  3606. for it. Intended to get systems with badly broken
  3607. diff -Nur linux-4.4.62.orig/Documentation/sysrq.txt linux-4.4.62/Documentation/sysrq.txt
  3608. --- linux-4.4.62.orig/Documentation/sysrq.txt 2017-04-18 07:15:37.000000000 +0200
  3609. +++ linux-4.4.62/Documentation/sysrq.txt 2017-04-18 17:38:07.902637922 +0200
  3610. @@ -59,10 +59,17 @@
  3611. On other - If you know of the key combos for other architectures, please
  3612. let me know so I can add them to this section.
  3613. -On all - write a character to /proc/sysrq-trigger. e.g.:
  3614. -
  3615. +On all - write a character to /proc/sysrq-trigger, e.g.:
  3616. echo t > /proc/sysrq-trigger
  3617. +On all - Enable network SysRq by writing a cookie to icmp_echo_sysrq, e.g.
  3618. + echo 0x01020304 >/proc/sys/net/ipv4/icmp_echo_sysrq
  3619. + Send an ICMP echo request with this pattern plus the particular
  3620. + SysRq command key. Example:
  3621. + # ping -c1 -s57 -p0102030468
  3622. + will trigger the SysRq-H (help) command.
  3623. +
  3624. +
  3625. * What are the 'command' keys?
  3626. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  3627. 'b' - Will immediately reboot the system without syncing or unmounting
  3628. diff -Nur linux-4.4.62.orig/Documentation/trace/histograms.txt linux-4.4.62/Documentation/trace/histograms.txt
  3629. --- linux-4.4.62.orig/Documentation/trace/histograms.txt 1970-01-01 01:00:00.000000000 +0100
  3630. +++ linux-4.4.62/Documentation/trace/histograms.txt 2017-04-18 17:38:07.902637922 +0200
  3631. @@ -0,0 +1,186 @@
  3632. + Using the Linux Kernel Latency Histograms
  3633. +
  3634. +
  3635. +This document gives a short explanation how to enable, configure and use
  3636. +latency histograms. Latency histograms are primarily relevant in the
  3637. +context of real-time enabled kernels (CONFIG_PREEMPT/CONFIG_PREEMPT_RT)
  3638. +and are used in the quality management of the Linux real-time
  3639. +capabilities.
  3640. +
  3641. +
  3642. +* Purpose of latency histograms
  3643. +
  3644. +A latency histogram continuously accumulates the frequencies of latency
  3645. +data. There are two types of histograms
  3646. +- potential sources of latencies
  3647. +- effective latencies
  3648. +
  3649. +
  3650. +* Potential sources of latencies
  3651. +
  3652. +Potential sources of latencies are code segments where interrupts,
  3653. +preemption or both are disabled (aka critical sections). To create
  3654. +histograms of potential sources of latency, the kernel stores the time
  3655. +stamp at the start of a critical section, determines the time elapsed
  3656. +when the end of the section is reached, and increments the frequency
  3657. +counter of that latency value - irrespective of whether any concurrently
  3658. +running process is affected by latency or not.
  3659. +- Configuration items (in the Kernel hacking/Tracers submenu)
  3660. + CONFIG_INTERRUPT_OFF_LATENCY
  3661. + CONFIG_PREEMPT_OFF_LATENCY
  3662. +
  3663. +
  3664. +* Effective latencies
  3665. +
  3666. +Effective latencies are actually occuring during wakeup of a process. To
  3667. +determine effective latencies, the kernel stores the time stamp when a
  3668. +process is scheduled to be woken up, and determines the duration of the
  3669. +wakeup time shortly before control is passed over to this process. Note
  3670. +that the apparent latency in user space may be somewhat longer, since the
  3671. +process may be interrupted after control is passed over to it but before
  3672. +the execution in user space takes place. Simply measuring the interval
  3673. +between enqueuing and wakeup may also not appropriate in cases when a
  3674. +process is scheduled as a result of a timer expiration. The timer may have
  3675. +missed its deadline, e.g. due to disabled interrupts, but this latency
  3676. +would not be registered. Therefore, the offsets of missed timers are
  3677. +recorded in a separate histogram. If both wakeup latency and missed timer
  3678. +offsets are configured and enabled, a third histogram may be enabled that
  3679. +records the overall latency as a sum of the timer latency, if any, and the
  3680. +wakeup latency. This histogram is called "timerandwakeup".
  3681. +- Configuration items (in the Kernel hacking/Tracers submenu)
  3682. + CONFIG_WAKEUP_LATENCY
  3683. + CONFIG_MISSED_TIMER_OFSETS
  3684. +
  3685. +
  3686. +* Usage
  3687. +
  3688. +The interface to the administration of the latency histograms is located
  3689. +in the debugfs file system. To mount it, either enter
  3690. +
  3691. +mount -t sysfs nodev /sys
  3692. +mount -t debugfs nodev /sys/kernel/debug
  3693. +
  3694. +from shell command line level, or add
  3695. +
  3696. +nodev /sys sysfs defaults 0 0
  3697. +nodev /sys/kernel/debug debugfs defaults 0 0
  3698. +
  3699. +to the file /etc/fstab. All latency histogram related files are then
  3700. +available in the directory /sys/kernel/debug/tracing/latency_hist. A
  3701. +particular histogram type is enabled by writing non-zero to the related
  3702. +variable in the /sys/kernel/debug/tracing/latency_hist/enable directory.
  3703. +Select "preemptirqsoff" for the histograms of potential sources of
  3704. +latencies and "wakeup" for histograms of effective latencies etc. The
  3705. +histogram data - one per CPU - are available in the files
  3706. +
  3707. +/sys/kernel/debug/tracing/latency_hist/preemptoff/CPUx
  3708. +/sys/kernel/debug/tracing/latency_hist/irqsoff/CPUx
  3709. +/sys/kernel/debug/tracing/latency_hist/preemptirqsoff/CPUx
  3710. +/sys/kernel/debug/tracing/latency_hist/wakeup/CPUx
  3711. +/sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio/CPUx
  3712. +/sys/kernel/debug/tracing/latency_hist/missed_timer_offsets/CPUx
  3713. +/sys/kernel/debug/tracing/latency_hist/timerandwakeup/CPUx
  3714. +
  3715. +The histograms are reset by writing non-zero to the file "reset" in a
  3716. +particular latency directory. To reset all latency data, use
  3717. +
  3718. +#!/bin/sh
  3719. +
  3720. +TRACINGDIR=/sys/kernel/debug/tracing
  3721. +HISTDIR=$TRACINGDIR/latency_hist
  3722. +
  3723. +if test -d $HISTDIR
  3724. +then
  3725. + cd $HISTDIR
  3726. + for i in `find . | grep /reset$`
  3727. + do
  3728. + echo 1 >$i
  3729. + done
  3730. +fi
  3731. +
  3732. +
  3733. +* Data format
  3734. +
  3735. +Latency data are stored with a resolution of one microsecond. The
  3736. +maximum latency is 10,240 microseconds. The data are only valid, if the
  3737. +overflow register is empty. Every output line contains the latency in
  3738. +microseconds in the first row and the number of samples in the second
  3739. +row. To display only lines with a positive latency count, use, for
  3740. +example,
  3741. +
  3742. +grep -v " 0$" /sys/kernel/debug/tracing/latency_hist/preemptoff/CPU0
  3743. +
  3744. +#Minimum latency: 0 microseconds.
  3745. +#Average latency: 0 microseconds.
  3746. +#Maximum latency: 25 microseconds.
  3747. +#Total samples: 3104770694
  3748. +#There are 0 samples greater or equal than 10240 microseconds
  3749. +#usecs samples
  3750. + 0 2984486876
  3751. + 1 49843506
  3752. + 2 58219047
  3753. + 3 5348126
  3754. + 4 2187960
  3755. + 5 3388262
  3756. + 6 959289
  3757. + 7 208294
  3758. + 8 40420
  3759. + 9 4485
  3760. + 10 14918
  3761. + 11 18340
  3762. + 12 25052
  3763. + 13 19455
  3764. + 14 5602
  3765. + 15 969
  3766. + 16 47
  3767. + 17 18
  3768. + 18 14
  3769. + 19 1
  3770. + 20 3
  3771. + 21 2
  3772. + 22 5
  3773. + 23 2
  3774. + 25 1
  3775. +
  3776. +
  3777. +* Wakeup latency of a selected process
  3778. +
  3779. +To only collect wakeup latency data of a particular process, write the
  3780. +PID of the requested process to
  3781. +
  3782. +/sys/kernel/debug/tracing/latency_hist/wakeup/pid
  3783. +
  3784. +PIDs are not considered, if this variable is set to 0.
  3785. +
  3786. +
  3787. +* Details of the process with the highest wakeup latency so far
  3788. +
  3789. +Selected data of the process that suffered from the highest wakeup
  3790. +latency that occurred in a particular CPU are available in the file
  3791. +
  3792. +/sys/kernel/debug/tracing/latency_hist/wakeup/max_latency-CPUx.
  3793. +
  3794. +In addition, other relevant system data at the time when the
  3795. +latency occurred are given.
  3796. +
  3797. +The format of the data is (all in one line):
  3798. +<PID> <Priority> <Latency> (<Timeroffset>) <Command> \
  3799. +<- <PID> <Priority> <Command> <Timestamp>
  3800. +
  3801. +The value of <Timeroffset> is only relevant in the combined timer
  3802. +and wakeup latency recording. In the wakeup recording, it is
  3803. +always 0, in the missed_timer_offsets recording, it is the same
  3804. +as <Latency>.
  3805. +
  3806. +When retrospectively searching for the origin of a latency and
  3807. +tracing was not enabled, it may be helpful to know the name and
  3808. +some basic data of the task that (finally) was switching to the
  3809. +late real-tlme task. In addition to the victim's data, also the
  3810. +data of the possible culprit are therefore displayed after the
  3811. +"<-" symbol.
  3812. +
  3813. +Finally, the timestamp of the time when the latency occurred
  3814. +in <seconds>.<microseconds> after the most recent system boot
  3815. +is provided.
  3816. +
  3817. +These data are also reset when the wakeup histogram is reset.
  3818. diff -Nur linux-4.4.62.orig/drivers/acpi/acpica/acglobal.h linux-4.4.62/drivers/acpi/acpica/acglobal.h
  3819. --- linux-4.4.62.orig/drivers/acpi/acpica/acglobal.h 2017-04-18 07:15:37.000000000 +0200
  3820. +++ linux-4.4.62/drivers/acpi/acpica/acglobal.h 2017-04-18 17:38:08.042643350 +0200
  3821. @@ -116,7 +116,7 @@
  3822. * interrupt level
  3823. */
  3824. ACPI_GLOBAL(acpi_spinlock, acpi_gbl_gpe_lock); /* For GPE data structs and registers */
  3825. -ACPI_GLOBAL(acpi_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */
  3826. +ACPI_GLOBAL(acpi_raw_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */
  3827. ACPI_GLOBAL(acpi_spinlock, acpi_gbl_reference_count_lock);
  3828. /* Mutex for _OSI support */
  3829. diff -Nur linux-4.4.62.orig/drivers/acpi/acpica/hwregs.c linux-4.4.62/drivers/acpi/acpica/hwregs.c
  3830. --- linux-4.4.62.orig/drivers/acpi/acpica/hwregs.c 2017-04-18 07:15:37.000000000 +0200
  3831. +++ linux-4.4.62/drivers/acpi/acpica/hwregs.c 2017-04-18 17:38:08.042643350 +0200
  3832. @@ -269,14 +269,14 @@
  3833. ACPI_BITMASK_ALL_FIXED_STATUS,
  3834. ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address)));
  3835. - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
  3836. + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags);
  3837. /* Clear the fixed events in PM1 A/B */
  3838. status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS,
  3839. ACPI_BITMASK_ALL_FIXED_STATUS);
  3840. - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
  3841. + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags);
  3842. if (ACPI_FAILURE(status)) {
  3843. goto exit;
  3844. diff -Nur linux-4.4.62.orig/drivers/acpi/acpica/hwxface.c linux-4.4.62/drivers/acpi/acpica/hwxface.c
  3845. --- linux-4.4.62.orig/drivers/acpi/acpica/hwxface.c 2017-04-18 07:15:37.000000000 +0200
  3846. +++ linux-4.4.62/drivers/acpi/acpica/hwxface.c 2017-04-18 17:38:08.042643350 +0200
  3847. @@ -374,7 +374,7 @@
  3848. return_ACPI_STATUS(AE_BAD_PARAMETER);
  3849. }
  3850. - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
  3851. + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags);
  3852. /*
  3853. * At this point, we know that the parent register is one of the
  3854. @@ -435,7 +435,7 @@
  3855. unlock_and_exit:
  3856. - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
  3857. + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags);
  3858. return_ACPI_STATUS(status);
  3859. }
  3860. diff -Nur linux-4.4.62.orig/drivers/acpi/acpica/utmutex.c linux-4.4.62/drivers/acpi/acpica/utmutex.c
  3861. --- linux-4.4.62.orig/drivers/acpi/acpica/utmutex.c 2017-04-18 07:15:37.000000000 +0200
  3862. +++ linux-4.4.62/drivers/acpi/acpica/utmutex.c 2017-04-18 17:38:08.042643350 +0200
  3863. @@ -88,7 +88,7 @@
  3864. return_ACPI_STATUS (status);
  3865. }
  3866. - status = acpi_os_create_lock (&acpi_gbl_hardware_lock);
  3867. + status = acpi_os_create_raw_lock (&acpi_gbl_hardware_lock);
  3868. if (ACPI_FAILURE (status)) {
  3869. return_ACPI_STATUS (status);
  3870. }
  3871. @@ -156,7 +156,7 @@
  3872. /* Delete the spinlocks */
  3873. acpi_os_delete_lock(acpi_gbl_gpe_lock);
  3874. - acpi_os_delete_lock(acpi_gbl_hardware_lock);
  3875. + acpi_os_delete_raw_lock(acpi_gbl_hardware_lock);
  3876. acpi_os_delete_lock(acpi_gbl_reference_count_lock);
  3877. /* Delete the reader/writer lock */
  3878. diff -Nur linux-4.4.62.orig/drivers/ata/libata-sff.c linux-4.4.62/drivers/ata/libata-sff.c
  3879. --- linux-4.4.62.orig/drivers/ata/libata-sff.c 2017-04-18 07:15:37.000000000 +0200
  3880. +++ linux-4.4.62/drivers/ata/libata-sff.c 2017-04-18 17:38:08.042643350 +0200
  3881. @@ -678,9 +678,9 @@
  3882. unsigned long flags;
  3883. unsigned int consumed;
  3884. - local_irq_save(flags);
  3885. + local_irq_save_nort(flags);
  3886. consumed = ata_sff_data_xfer32(dev, buf, buflen, rw);
  3887. - local_irq_restore(flags);
  3888. + local_irq_restore_nort(flags);
  3889. return consumed;
  3890. }
  3891. @@ -719,7 +719,7 @@
  3892. unsigned long flags;
  3893. /* FIXME: use a bounce buffer */
  3894. - local_irq_save(flags);
  3895. + local_irq_save_nort(flags);
  3896. buf = kmap_atomic(page);
  3897. /* do the actual data transfer */
  3898. @@ -727,7 +727,7 @@
  3899. do_write);
  3900. kunmap_atomic(buf);
  3901. - local_irq_restore(flags);
  3902. + local_irq_restore_nort(flags);
  3903. } else {
  3904. buf = page_address(page);
  3905. ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size,
  3906. @@ -864,7 +864,7 @@
  3907. unsigned long flags;
  3908. /* FIXME: use bounce buffer */
  3909. - local_irq_save(flags);
  3910. + local_irq_save_nort(flags);
  3911. buf = kmap_atomic(page);
  3912. /* do the actual data transfer */
  3913. @@ -872,7 +872,7 @@
  3914. count, rw);
  3915. kunmap_atomic(buf);
  3916. - local_irq_restore(flags);
  3917. + local_irq_restore_nort(flags);
  3918. } else {
  3919. buf = page_address(page);
  3920. consumed = ap->ops->sff_data_xfer(dev, buf + offset,
  3921. diff -Nur linux-4.4.62.orig/drivers/block/zram/zram_drv.c linux-4.4.62/drivers/block/zram/zram_drv.c
  3922. --- linux-4.4.62.orig/drivers/block/zram/zram_drv.c 2017-04-18 07:15:37.000000000 +0200
  3923. +++ linux-4.4.62/drivers/block/zram/zram_drv.c 2017-04-18 17:38:08.042643350 +0200
  3924. @@ -520,6 +520,8 @@
  3925. goto out_error;
  3926. }
  3927. + zram_meta_init_table_locks(meta, disksize);
  3928. +
  3929. return meta;
  3930. out_error:
  3931. @@ -568,12 +570,12 @@
  3932. unsigned long handle;
  3933. size_t size;
  3934. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  3935. + zram_lock_table(&meta->table[index]);
  3936. handle = meta->table[index].handle;
  3937. size = zram_get_obj_size(meta, index);
  3938. if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
  3939. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3940. + zram_unlock_table(&meta->table[index]);
  3941. clear_page(mem);
  3942. return 0;
  3943. }
  3944. @@ -584,7 +586,7 @@
  3945. else
  3946. ret = zcomp_decompress(zram->comp, cmem, size, mem);
  3947. zs_unmap_object(meta->mem_pool, handle);
  3948. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3949. + zram_unlock_table(&meta->table[index]);
  3950. /* Should NEVER happen. Return bio error if it does. */
  3951. if (unlikely(ret)) {
  3952. @@ -604,14 +606,14 @@
  3953. struct zram_meta *meta = zram->meta;
  3954. page = bvec->bv_page;
  3955. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  3956. + zram_lock_table(&meta->table[index]);
  3957. if (unlikely(!meta->table[index].handle) ||
  3958. zram_test_flag(meta, index, ZRAM_ZERO)) {
  3959. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3960. + zram_unlock_table(&meta->table[index]);
  3961. handle_zero_page(bvec);
  3962. return 0;
  3963. }
  3964. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3965. + zram_unlock_table(&meta->table[index]);
  3966. if (is_partial_io(bvec))
  3967. /* Use a temporary buffer to decompress the page */
  3968. @@ -689,10 +691,10 @@
  3969. if (user_mem)
  3970. kunmap_atomic(user_mem);
  3971. /* Free memory associated with this sector now. */
  3972. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  3973. + zram_lock_table(&meta->table[index]);
  3974. zram_free_page(zram, index);
  3975. zram_set_flag(meta, index, ZRAM_ZERO);
  3976. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3977. + zram_unlock_table(&meta->table[index]);
  3978. atomic64_inc(&zram->stats.zero_pages);
  3979. ret = 0;
  3980. @@ -752,12 +754,12 @@
  3981. * Free memory associated with this sector
  3982. * before overwriting unused sectors.
  3983. */
  3984. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  3985. + zram_lock_table(&meta->table[index]);
  3986. zram_free_page(zram, index);
  3987. meta->table[index].handle = handle;
  3988. zram_set_obj_size(meta, index, clen);
  3989. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  3990. + zram_unlock_table(&meta->table[index]);
  3991. /* Update stats */
  3992. atomic64_add(clen, &zram->stats.compr_data_size);
  3993. @@ -800,9 +802,9 @@
  3994. }
  3995. while (n >= PAGE_SIZE) {
  3996. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  3997. + zram_lock_table(&meta->table[index]);
  3998. zram_free_page(zram, index);
  3999. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  4000. + zram_unlock_table(&meta->table[index]);
  4001. atomic64_inc(&zram->stats.notify_free);
  4002. index++;
  4003. n -= PAGE_SIZE;
  4004. @@ -928,9 +930,9 @@
  4005. zram = bdev->bd_disk->private_data;
  4006. meta = zram->meta;
  4007. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  4008. + zram_lock_table(&meta->table[index]);
  4009. zram_free_page(zram, index);
  4010. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  4011. + zram_unlock_table(&meta->table[index]);
  4012. atomic64_inc(&zram->stats.notify_free);
  4013. }
  4014. diff -Nur linux-4.4.62.orig/drivers/block/zram/zram_drv.h linux-4.4.62/drivers/block/zram/zram_drv.h
  4015. --- linux-4.4.62.orig/drivers/block/zram/zram_drv.h 2017-04-18 07:15:37.000000000 +0200
  4016. +++ linux-4.4.62/drivers/block/zram/zram_drv.h 2017-04-18 17:38:08.042643350 +0200
  4017. @@ -72,6 +72,9 @@
  4018. struct zram_table_entry {
  4019. unsigned long handle;
  4020. unsigned long value;
  4021. +#ifdef CONFIG_PREEMPT_RT_BASE
  4022. + spinlock_t lock;
  4023. +#endif
  4024. };
  4025. struct zram_stats {
  4026. @@ -119,4 +122,42 @@
  4027. */
  4028. bool claim; /* Protected by bdev->bd_mutex */
  4029. };
  4030. +
  4031. +#ifndef CONFIG_PREEMPT_RT_BASE
  4032. +static inline void zram_lock_table(struct zram_table_entry *table)
  4033. +{
  4034. + bit_spin_lock(ZRAM_ACCESS, &table->value);
  4035. +}
  4036. +
  4037. +static inline void zram_unlock_table(struct zram_table_entry *table)
  4038. +{
  4039. + bit_spin_unlock(ZRAM_ACCESS, &table->value);
  4040. +}
  4041. +
  4042. +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize) { }
  4043. +#else /* CONFIG_PREEMPT_RT_BASE */
  4044. +static inline void zram_lock_table(struct zram_table_entry *table)
  4045. +{
  4046. + spin_lock(&table->lock);
  4047. + __set_bit(ZRAM_ACCESS, &table->value);
  4048. +}
  4049. +
  4050. +static inline void zram_unlock_table(struct zram_table_entry *table)
  4051. +{
  4052. + __clear_bit(ZRAM_ACCESS, &table->value);
  4053. + spin_unlock(&table->lock);
  4054. +}
  4055. +
  4056. +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize)
  4057. +{
  4058. + size_t num_pages = disksize >> PAGE_SHIFT;
  4059. + size_t index;
  4060. +
  4061. + for (index = 0; index < num_pages; index++) {
  4062. + spinlock_t *lock = &meta->table[index].lock;
  4063. + spin_lock_init(lock);
  4064. + }
  4065. +}
  4066. +#endif /* CONFIG_PREEMPT_RT_BASE */
  4067. +
  4068. #endif
  4069. diff -Nur linux-4.4.62.orig/drivers/char/random.c linux-4.4.62/drivers/char/random.c
  4070. --- linux-4.4.62.orig/drivers/char/random.c 2017-04-18 07:15:37.000000000 +0200
  4071. +++ linux-4.4.62/drivers/char/random.c 2017-04-18 17:38:08.042643350 +0200
  4072. @@ -799,8 +799,6 @@
  4073. } sample;
  4074. long delta, delta2, delta3;
  4075. - preempt_disable();
  4076. -
  4077. sample.jiffies = jiffies;
  4078. sample.cycles = random_get_entropy();
  4079. sample.num = num;
  4080. @@ -841,7 +839,6 @@
  4081. */
  4082. credit_entropy_bits(r, min_t(int, fls(delta>>1), 11));
  4083. }
  4084. - preempt_enable();
  4085. }
  4086. void add_input_randomness(unsigned int type, unsigned int code,
  4087. @@ -894,28 +891,27 @@
  4088. return *(ptr + f->reg_idx++);
  4089. }
  4090. -void add_interrupt_randomness(int irq, int irq_flags)
  4091. +void add_interrupt_randomness(int irq, int irq_flags, __u64 ip)
  4092. {
  4093. struct entropy_store *r;
  4094. struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness);
  4095. - struct pt_regs *regs = get_irq_regs();
  4096. unsigned long now = jiffies;
  4097. cycles_t cycles = random_get_entropy();
  4098. __u32 c_high, j_high;
  4099. - __u64 ip;
  4100. unsigned long seed;
  4101. int credit = 0;
  4102. if (cycles == 0)
  4103. - cycles = get_reg(fast_pool, regs);
  4104. + cycles = get_reg(fast_pool, NULL);
  4105. c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0;
  4106. j_high = (sizeof(now) > 4) ? now >> 32 : 0;
  4107. fast_pool->pool[0] ^= cycles ^ j_high ^ irq;
  4108. fast_pool->pool[1] ^= now ^ c_high;
  4109. - ip = regs ? instruction_pointer(regs) : _RET_IP_;
  4110. + if (!ip)
  4111. + ip = _RET_IP_;
  4112. fast_pool->pool[2] ^= ip;
  4113. fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 :
  4114. - get_reg(fast_pool, regs);
  4115. + get_reg(fast_pool, NULL);
  4116. fast_mix(fast_pool);
  4117. add_interrupt_bench(cycles);
  4118. diff -Nur linux-4.4.62.orig/drivers/clk/at91/clk-generated.c linux-4.4.62/drivers/clk/at91/clk-generated.c
  4119. --- linux-4.4.62.orig/drivers/clk/at91/clk-generated.c 2017-04-18 07:15:37.000000000 +0200
  4120. +++ linux-4.4.62/drivers/clk/at91/clk-generated.c 2017-04-18 17:38:08.042643350 +0200
  4121. @@ -15,8 +15,8 @@
  4122. #include <linux/clkdev.h>
  4123. #include <linux/clk/at91_pmc.h>
  4124. #include <linux/of.h>
  4125. -#include <linux/of_address.h>
  4126. -#include <linux/io.h>
  4127. +#include <linux/mfd/syscon.h>
  4128. +#include <linux/regmap.h>
  4129. #include "pmc.h"
  4130. @@ -28,8 +28,9 @@
  4131. struct clk_generated {
  4132. struct clk_hw hw;
  4133. - struct at91_pmc *pmc;
  4134. + struct regmap *regmap;
  4135. struct clk_range range;
  4136. + spinlock_t *lock;
  4137. u32 id;
  4138. u32 gckdiv;
  4139. u8 parent_id;
  4140. @@ -41,49 +42,52 @@
  4141. static int clk_generated_enable(struct clk_hw *hw)
  4142. {
  4143. struct clk_generated *gck = to_clk_generated(hw);
  4144. - struct at91_pmc *pmc = gck->pmc;
  4145. - u32 tmp;
  4146. + unsigned long flags;
  4147. pr_debug("GCLK: %s, gckdiv = %d, parent id = %d\n",
  4148. __func__, gck->gckdiv, gck->parent_id);
  4149. - pmc_lock(pmc);
  4150. - pmc_write(pmc, AT91_PMC_PCR, (gck->id & AT91_PMC_PCR_PID_MASK));
  4151. - tmp = pmc_read(pmc, AT91_PMC_PCR) &
  4152. - ~(AT91_PMC_PCR_GCKDIV_MASK | AT91_PMC_PCR_GCKCSS_MASK);
  4153. - pmc_write(pmc, AT91_PMC_PCR, tmp | AT91_PMC_PCR_GCKCSS(gck->parent_id)
  4154. - | AT91_PMC_PCR_CMD
  4155. - | AT91_PMC_PCR_GCKDIV(gck->gckdiv)
  4156. - | AT91_PMC_PCR_GCKEN);
  4157. - pmc_unlock(pmc);
  4158. + spin_lock_irqsave(gck->lock, flags);
  4159. + regmap_write(gck->regmap, AT91_PMC_PCR,
  4160. + (gck->id & AT91_PMC_PCR_PID_MASK));
  4161. + regmap_update_bits(gck->regmap, AT91_PMC_PCR,
  4162. + AT91_PMC_PCR_GCKDIV_MASK | AT91_PMC_PCR_GCKCSS_MASK |
  4163. + AT91_PMC_PCR_CMD | AT91_PMC_PCR_GCKEN,
  4164. + AT91_PMC_PCR_GCKCSS(gck->parent_id) |
  4165. + AT91_PMC_PCR_CMD |
  4166. + AT91_PMC_PCR_GCKDIV(gck->gckdiv) |
  4167. + AT91_PMC_PCR_GCKEN);
  4168. + spin_unlock_irqrestore(gck->lock, flags);
  4169. return 0;
  4170. }
  4171. static void clk_generated_disable(struct clk_hw *hw)
  4172. {
  4173. struct clk_generated *gck = to_clk_generated(hw);
  4174. - struct at91_pmc *pmc = gck->pmc;
  4175. - u32 tmp;
  4176. + unsigned long flags;
  4177. - pmc_lock(pmc);
  4178. - pmc_write(pmc, AT91_PMC_PCR, (gck->id & AT91_PMC_PCR_PID_MASK));
  4179. - tmp = pmc_read(pmc, AT91_PMC_PCR) & ~AT91_PMC_PCR_GCKEN;
  4180. - pmc_write(pmc, AT91_PMC_PCR, tmp | AT91_PMC_PCR_CMD);
  4181. - pmc_unlock(pmc);
  4182. + spin_lock_irqsave(gck->lock, flags);
  4183. + regmap_write(gck->regmap, AT91_PMC_PCR,
  4184. + (gck->id & AT91_PMC_PCR_PID_MASK));
  4185. + regmap_update_bits(gck->regmap, AT91_PMC_PCR,
  4186. + AT91_PMC_PCR_CMD | AT91_PMC_PCR_GCKEN,
  4187. + AT91_PMC_PCR_CMD);
  4188. + spin_unlock_irqrestore(gck->lock, flags);
  4189. }
  4190. static int clk_generated_is_enabled(struct clk_hw *hw)
  4191. {
  4192. struct clk_generated *gck = to_clk_generated(hw);
  4193. - struct at91_pmc *pmc = gck->pmc;
  4194. - int ret;
  4195. + unsigned long flags;
  4196. + unsigned int status;
  4197. - pmc_lock(pmc);
  4198. - pmc_write(pmc, AT91_PMC_PCR, (gck->id & AT91_PMC_PCR_PID_MASK));
  4199. - ret = !!(pmc_read(pmc, AT91_PMC_PCR) & AT91_PMC_PCR_GCKEN);
  4200. - pmc_unlock(pmc);
  4201. + spin_lock_irqsave(gck->lock, flags);
  4202. + regmap_write(gck->regmap, AT91_PMC_PCR,
  4203. + (gck->id & AT91_PMC_PCR_PID_MASK));
  4204. + regmap_read(gck->regmap, AT91_PMC_PCR, &status);
  4205. + spin_unlock_irqrestore(gck->lock, flags);
  4206. - return ret;
  4207. + return status & AT91_PMC_PCR_GCKEN ? 1 : 0;
  4208. }
  4209. static unsigned long
  4210. @@ -214,13 +218,14 @@
  4211. */
  4212. static void clk_generated_startup(struct clk_generated *gck)
  4213. {
  4214. - struct at91_pmc *pmc = gck->pmc;
  4215. u32 tmp;
  4216. + unsigned long flags;
  4217. - pmc_lock(pmc);
  4218. - pmc_write(pmc, AT91_PMC_PCR, (gck->id & AT91_PMC_PCR_PID_MASK));
  4219. - tmp = pmc_read(pmc, AT91_PMC_PCR);
  4220. - pmc_unlock(pmc);
  4221. + spin_lock_irqsave(gck->lock, flags);
  4222. + regmap_write(gck->regmap, AT91_PMC_PCR,
  4223. + (gck->id & AT91_PMC_PCR_PID_MASK));
  4224. + regmap_read(gck->regmap, AT91_PMC_PCR, &tmp);
  4225. + spin_unlock_irqrestore(gck->lock, flags);
  4226. gck->parent_id = (tmp & AT91_PMC_PCR_GCKCSS_MASK)
  4227. >> AT91_PMC_PCR_GCKCSS_OFFSET;
  4228. @@ -229,8 +234,8 @@
  4229. }
  4230. static struct clk * __init
  4231. -at91_clk_register_generated(struct at91_pmc *pmc, const char *name,
  4232. - const char **parent_names, u8 num_parents,
  4233. +at91_clk_register_generated(struct regmap *regmap, spinlock_t *lock, const char
  4234. + *name, const char **parent_names, u8 num_parents,
  4235. u8 id, const struct clk_range *range)
  4236. {
  4237. struct clk_generated *gck;
  4238. @@ -249,7 +254,8 @@
  4239. gck->id = id;
  4240. gck->hw.init = &init;
  4241. - gck->pmc = pmc;
  4242. + gck->regmap = regmap;
  4243. + gck->lock = lock;
  4244. gck->range = *range;
  4245. clk = clk_register(NULL, &gck->hw);
  4246. @@ -261,8 +267,7 @@
  4247. return clk;
  4248. }
  4249. -void __init of_sama5d2_clk_generated_setup(struct device_node *np,
  4250. - struct at91_pmc *pmc)
  4251. +void __init of_sama5d2_clk_generated_setup(struct device_node *np)
  4252. {
  4253. int num;
  4254. u32 id;
  4255. @@ -272,6 +277,7 @@
  4256. const char *parent_names[GENERATED_SOURCE_MAX];
  4257. struct device_node *gcknp;
  4258. struct clk_range range = CLK_RANGE(0, 0);
  4259. + struct regmap *regmap;
  4260. num_parents = of_clk_get_parent_count(np);
  4261. if (num_parents <= 0 || num_parents > GENERATED_SOURCE_MAX)
  4262. @@ -283,6 +289,10 @@
  4263. if (!num || num > PERIPHERAL_MAX)
  4264. return;
  4265. + regmap = syscon_node_to_regmap(of_get_parent(np));
  4266. + if (IS_ERR(regmap))
  4267. + return;
  4268. +
  4269. for_each_child_of_node(np, gcknp) {
  4270. if (of_property_read_u32(gcknp, "reg", &id))
  4271. continue;
  4272. @@ -296,11 +306,14 @@
  4273. of_at91_get_clk_range(gcknp, "atmel,clk-output-range",
  4274. &range);
  4275. - clk = at91_clk_register_generated(pmc, name, parent_names,
  4276. - num_parents, id, &range);
  4277. + clk = at91_clk_register_generated(regmap, &pmc_pcr_lock, name,
  4278. + parent_names, num_parents,
  4279. + id, &range);
  4280. if (IS_ERR(clk))
  4281. continue;
  4282. of_clk_add_provider(gcknp, of_clk_src_simple_get, clk);
  4283. }
  4284. }
  4285. +CLK_OF_DECLARE(of_sama5d2_clk_generated_setup, "atmel,sama5d2-clk-generated",
  4286. + of_sama5d2_clk_generated_setup);
  4287. diff -Nur linux-4.4.62.orig/drivers/clk/at91/clk-h32mx.c linux-4.4.62/drivers/clk/at91/clk-h32mx.c
  4288. --- linux-4.4.62.orig/drivers/clk/at91/clk-h32mx.c 2017-04-18 07:15:37.000000000 +0200
  4289. +++ linux-4.4.62/drivers/clk/at91/clk-h32mx.c 2017-04-18 17:38:08.042643350 +0200
  4290. @@ -15,15 +15,9 @@
  4291. #include <linux/clk-provider.h>
  4292. #include <linux/clkdev.h>
  4293. #include <linux/clk/at91_pmc.h>
  4294. -#include <linux/delay.h>
  4295. #include <linux/of.h>
  4296. -#include <linux/of_address.h>
  4297. -#include <linux/of_irq.h>
  4298. -#include <linux/io.h>
  4299. -#include <linux/interrupt.h>
  4300. -#include <linux/irq.h>
  4301. -#include <linux/sched.h>
  4302. -#include <linux/wait.h>
  4303. +#include <linux/regmap.h>
  4304. +#include <linux/mfd/syscon.h>
  4305. #include "pmc.h"
  4306. @@ -31,7 +25,7 @@
  4307. struct clk_sama5d4_h32mx {
  4308. struct clk_hw hw;
  4309. - struct at91_pmc *pmc;
  4310. + struct regmap *regmap;
  4311. };
  4312. #define to_clk_sama5d4_h32mx(hw) container_of(hw, struct clk_sama5d4_h32mx, hw)
  4313. @@ -40,8 +34,10 @@
  4314. unsigned long parent_rate)
  4315. {
  4316. struct clk_sama5d4_h32mx *h32mxclk = to_clk_sama5d4_h32mx(hw);
  4317. + unsigned int mckr;
  4318. - if (pmc_read(h32mxclk->pmc, AT91_PMC_MCKR) & AT91_PMC_H32MXDIV)
  4319. + regmap_read(h32mxclk->regmap, AT91_PMC_MCKR, &mckr);
  4320. + if (mckr & AT91_PMC_H32MXDIV)
  4321. return parent_rate / 2;
  4322. if (parent_rate > H32MX_MAX_FREQ)
  4323. @@ -70,18 +66,16 @@
  4324. unsigned long parent_rate)
  4325. {
  4326. struct clk_sama5d4_h32mx *h32mxclk = to_clk_sama5d4_h32mx(hw);
  4327. - struct at91_pmc *pmc = h32mxclk->pmc;
  4328. - u32 tmp;
  4329. + u32 mckr = 0;
  4330. if (parent_rate != rate && (parent_rate / 2) != rate)
  4331. return -EINVAL;
  4332. - pmc_lock(pmc);
  4333. - tmp = pmc_read(pmc, AT91_PMC_MCKR) & ~AT91_PMC_H32MXDIV;
  4334. if ((parent_rate / 2) == rate)
  4335. - tmp |= AT91_PMC_H32MXDIV;
  4336. - pmc_write(pmc, AT91_PMC_MCKR, tmp);
  4337. - pmc_unlock(pmc);
  4338. + mckr = AT91_PMC_H32MXDIV;
  4339. +
  4340. + regmap_update_bits(h32mxclk->regmap, AT91_PMC_MCKR,
  4341. + AT91_PMC_H32MXDIV, mckr);
  4342. return 0;
  4343. }
  4344. @@ -92,14 +86,18 @@
  4345. .set_rate = clk_sama5d4_h32mx_set_rate,
  4346. };
  4347. -void __init of_sama5d4_clk_h32mx_setup(struct device_node *np,
  4348. - struct at91_pmc *pmc)
  4349. +static void __init of_sama5d4_clk_h32mx_setup(struct device_node *np)
  4350. {
  4351. struct clk_sama5d4_h32mx *h32mxclk;
  4352. struct clk_init_data init;
  4353. const char *parent_name;
  4354. + struct regmap *regmap;
  4355. struct clk *clk;
  4356. + regmap = syscon_node_to_regmap(of_get_parent(np));
  4357. + if (IS_ERR(regmap))
  4358. + return;
  4359. +
  4360. h32mxclk = kzalloc(sizeof(*h32mxclk), GFP_KERNEL);
  4361. if (!h32mxclk)
  4362. return;
  4363. @@ -113,7 +111,7 @@
  4364. init.flags = CLK_SET_RATE_GATE;
  4365. h32mxclk->hw.init = &init;
  4366. - h32mxclk->pmc = pmc;
  4367. + h32mxclk->regmap = regmap;
  4368. clk = clk_register(NULL, &h32mxclk->hw);
  4369. if (IS_ERR(clk)) {
  4370. @@ -123,3 +121,5 @@
  4371. of_clk_add_provider(np, of_clk_src_simple_get, clk);
  4372. }
  4373. +CLK_OF_DECLARE(of_sama5d4_clk_h32mx_setup, "atmel,sama5d4-clk-h32mx",
  4374. + of_sama5d4_clk_h32mx_setup);
  4375. diff -Nur linux-4.4.62.orig/drivers/clk/at91/clk-main.c linux-4.4.62/drivers/clk/at91/clk-main.c
  4376. --- linux-4.4.62.orig/drivers/clk/at91/clk-main.c 2017-04-18 07:15:37.000000000 +0200
  4377. +++ linux-4.4.62/drivers/clk/at91/clk-main.c 2017-04-18 17:38:08.042643350 +0200
  4378. @@ -13,13 +13,8 @@
  4379. #include <linux/clk/at91_pmc.h>
  4380. #include <linux/delay.h>
  4381. #include <linux/of.h>
  4382. -#include <linux/of_address.h>
  4383. -#include <linux/of_irq.h>
  4384. -#include <linux/io.h>
  4385. -#include <linux/interrupt.h>
  4386. -#include <linux/irq.h>
  4387. -#include <linux/sched.h>
  4388. -#include <linux/wait.h>
  4389. +#include <linux/mfd/syscon.h>
  4390. +#include <linux/regmap.h>
  4391. #include "pmc.h"
  4392. @@ -34,18 +29,14 @@
  4393. struct clk_main_osc {
  4394. struct clk_hw hw;
  4395. - struct at91_pmc *pmc;
  4396. - unsigned int irq;
  4397. - wait_queue_head_t wait;
  4398. + struct regmap *regmap;
  4399. };
  4400. #define to_clk_main_osc(hw) container_of(hw, struct clk_main_osc, hw)
  4401. struct clk_main_rc_osc {
  4402. struct clk_hw hw;
  4403. - struct at91_pmc *pmc;
  4404. - unsigned int irq;
  4405. - wait_queue_head_t wait;
  4406. + struct regmap *regmap;
  4407. unsigned long frequency;
  4408. unsigned long accuracy;
  4409. };
  4410. @@ -54,51 +45,47 @@
  4411. struct clk_rm9200_main {
  4412. struct clk_hw hw;
  4413. - struct at91_pmc *pmc;
  4414. + struct regmap *regmap;
  4415. };
  4416. #define to_clk_rm9200_main(hw) container_of(hw, struct clk_rm9200_main, hw)
  4417. struct clk_sam9x5_main {
  4418. struct clk_hw hw;
  4419. - struct at91_pmc *pmc;
  4420. - unsigned int irq;
  4421. - wait_queue_head_t wait;
  4422. + struct regmap *regmap;
  4423. u8 parent;
  4424. };
  4425. #define to_clk_sam9x5_main(hw) container_of(hw, struct clk_sam9x5_main, hw)
  4426. -static irqreturn_t clk_main_osc_irq_handler(int irq, void *dev_id)
  4427. +static inline bool clk_main_osc_ready(struct regmap *regmap)
  4428. {
  4429. - struct clk_main_osc *osc = dev_id;
  4430. + unsigned int status;
  4431. - wake_up(&osc->wait);
  4432. - disable_irq_nosync(osc->irq);
  4433. + regmap_read(regmap, AT91_PMC_SR, &status);
  4434. - return IRQ_HANDLED;
  4435. + return status & AT91_PMC_MOSCS;
  4436. }
  4437. static int clk_main_osc_prepare(struct clk_hw *hw)
  4438. {
  4439. struct clk_main_osc *osc = to_clk_main_osc(hw);
  4440. - struct at91_pmc *pmc = osc->pmc;
  4441. + struct regmap *regmap = osc->regmap;
  4442. u32 tmp;
  4443. - tmp = pmc_read(pmc, AT91_CKGR_MOR) & ~MOR_KEY_MASK;
  4444. + regmap_read(regmap, AT91_CKGR_MOR, &tmp);
  4445. + tmp &= ~MOR_KEY_MASK;
  4446. +
  4447. if (tmp & AT91_PMC_OSCBYPASS)
  4448. return 0;
  4449. if (!(tmp & AT91_PMC_MOSCEN)) {
  4450. tmp |= AT91_PMC_MOSCEN | AT91_PMC_KEY;
  4451. - pmc_write(pmc, AT91_CKGR_MOR, tmp);
  4452. + regmap_write(regmap, AT91_CKGR_MOR, tmp);
  4453. }
  4454. - while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCS)) {
  4455. - enable_irq(osc->irq);
  4456. - wait_event(osc->wait,
  4457. - pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCS);
  4458. - }
  4459. + while (!clk_main_osc_ready(regmap))
  4460. + cpu_relax();
  4461. return 0;
  4462. }
  4463. @@ -106,9 +93,10 @@
  4464. static void clk_main_osc_unprepare(struct clk_hw *hw)
  4465. {
  4466. struct clk_main_osc *osc = to_clk_main_osc(hw);
  4467. - struct at91_pmc *pmc = osc->pmc;
  4468. - u32 tmp = pmc_read(pmc, AT91_CKGR_MOR);
  4469. + struct regmap *regmap = osc->regmap;
  4470. + u32 tmp;
  4471. + regmap_read(regmap, AT91_CKGR_MOR, &tmp);
  4472. if (tmp & AT91_PMC_OSCBYPASS)
  4473. return;
  4474. @@ -116,20 +104,22 @@
  4475. return;
  4476. tmp &= ~(AT91_PMC_KEY | AT91_PMC_MOSCEN);
  4477. - pmc_write(pmc, AT91_CKGR_MOR, tmp | AT91_PMC_KEY);
  4478. + regmap_write(regmap, AT91_CKGR_MOR, tmp | AT91_PMC_KEY);
  4479. }
  4480. static int clk_main_osc_is_prepared(struct clk_hw *hw)
  4481. {
  4482. struct clk_main_osc *osc = to_clk_main_osc(hw);
  4483. - struct at91_pmc *pmc = osc->pmc;
  4484. - u32 tmp = pmc_read(pmc, AT91_CKGR_MOR);
  4485. + struct regmap *regmap = osc->regmap;
  4486. + u32 tmp, status;
  4487. + regmap_read(regmap, AT91_CKGR_MOR, &tmp);
  4488. if (tmp & AT91_PMC_OSCBYPASS)
  4489. return 1;
  4490. - return !!((pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCS) &&
  4491. - (pmc_read(pmc, AT91_CKGR_MOR) & AT91_PMC_MOSCEN));
  4492. + regmap_read(regmap, AT91_PMC_SR, &status);
  4493. +
  4494. + return (status & AT91_PMC_MOSCS) && (tmp & AT91_PMC_MOSCEN);
  4495. }
  4496. static const struct clk_ops main_osc_ops = {
  4497. @@ -139,18 +129,16 @@
  4498. };
  4499. static struct clk * __init
  4500. -at91_clk_register_main_osc(struct at91_pmc *pmc,
  4501. - unsigned int irq,
  4502. +at91_clk_register_main_osc(struct regmap *regmap,
  4503. const char *name,
  4504. const char *parent_name,
  4505. bool bypass)
  4506. {
  4507. - int ret;
  4508. struct clk_main_osc *osc;
  4509. struct clk *clk = NULL;
  4510. struct clk_init_data init;
  4511. - if (!pmc || !irq || !name || !parent_name)
  4512. + if (!name || !parent_name)
  4513. return ERR_PTR(-EINVAL);
  4514. osc = kzalloc(sizeof(*osc), GFP_KERNEL);
  4515. @@ -164,85 +152,70 @@
  4516. init.flags = CLK_IGNORE_UNUSED;
  4517. osc->hw.init = &init;
  4518. - osc->pmc = pmc;
  4519. - osc->irq = irq;
  4520. -
  4521. - init_waitqueue_head(&osc->wait);
  4522. - irq_set_status_flags(osc->irq, IRQ_NOAUTOEN);
  4523. - ret = request_irq(osc->irq, clk_main_osc_irq_handler,
  4524. - IRQF_TRIGGER_HIGH, name, osc);
  4525. - if (ret) {
  4526. - kfree(osc);
  4527. - return ERR_PTR(ret);
  4528. - }
  4529. + osc->regmap = regmap;
  4530. if (bypass)
  4531. - pmc_write(pmc, AT91_CKGR_MOR,
  4532. - (pmc_read(pmc, AT91_CKGR_MOR) &
  4533. - ~(MOR_KEY_MASK | AT91_PMC_MOSCEN)) |
  4534. - AT91_PMC_OSCBYPASS | AT91_PMC_KEY);
  4535. + regmap_update_bits(regmap,
  4536. + AT91_CKGR_MOR, MOR_KEY_MASK |
  4537. + AT91_PMC_MOSCEN,
  4538. + AT91_PMC_OSCBYPASS | AT91_PMC_KEY);
  4539. clk = clk_register(NULL, &osc->hw);
  4540. - if (IS_ERR(clk)) {
  4541. - free_irq(irq, osc);
  4542. + if (IS_ERR(clk))
  4543. kfree(osc);
  4544. - }
  4545. return clk;
  4546. }
  4547. -void __init of_at91rm9200_clk_main_osc_setup(struct device_node *np,
  4548. - struct at91_pmc *pmc)
  4549. +static void __init of_at91rm9200_clk_main_osc_setup(struct device_node *np)
  4550. {
  4551. struct clk *clk;
  4552. - unsigned int irq;
  4553. const char *name = np->name;
  4554. const char *parent_name;
  4555. + struct regmap *regmap;
  4556. bool bypass;
  4557. of_property_read_string(np, "clock-output-names", &name);
  4558. bypass = of_property_read_bool(np, "atmel,osc-bypass");
  4559. parent_name = of_clk_get_parent_name(np, 0);
  4560. - irq = irq_of_parse_and_map(np, 0);
  4561. - if (!irq)
  4562. + regmap = syscon_node_to_regmap(of_get_parent(np));
  4563. + if (IS_ERR(regmap))
  4564. return;
  4565. - clk = at91_clk_register_main_osc(pmc, irq, name, parent_name, bypass);
  4566. + clk = at91_clk_register_main_osc(regmap, name, parent_name, bypass);
  4567. if (IS_ERR(clk))
  4568. return;
  4569. of_clk_add_provider(np, of_clk_src_simple_get, clk);
  4570. }
  4571. +CLK_OF_DECLARE(at91rm9200_clk_main_osc, "atmel,at91rm9200-clk-main-osc",
  4572. + of_at91rm9200_clk_main_osc_setup);
  4573. -static irqreturn_t clk_main_rc_osc_irq_handler(int irq, void *dev_id)
  4574. +static bool clk_main_rc_osc_ready(struct regmap *regmap)
  4575. {
  4576. - struct clk_main_rc_osc *osc = dev_id;
  4577. + unsigned int status;
  4578. - wake_up(&osc->wait);
  4579. - disable_irq_nosync(osc->irq);
  4580. + regmap_read(regmap, AT91_PMC_SR, &status);
  4581. - return IRQ_HANDLED;
  4582. + return status & AT91_PMC_MOSCRCS;
  4583. }
  4584. static int clk_main_rc_osc_prepare(struct clk_hw *hw)
  4585. {
  4586. struct clk_main_rc_osc *osc = to_clk_main_rc_osc(hw);
  4587. - struct at91_pmc *pmc = osc->pmc;
  4588. - u32 tmp;
  4589. + struct regmap *regmap = osc->regmap;
  4590. + unsigned int mor;
  4591. - tmp = pmc_read(pmc, AT91_CKGR_MOR) & ~MOR_KEY_MASK;
  4592. + regmap_read(regmap, AT91_CKGR_MOR, &mor);
  4593. - if (!(tmp & AT91_PMC_MOSCRCEN)) {
  4594. - tmp |= AT91_PMC_MOSCRCEN | AT91_PMC_KEY;
  4595. - pmc_write(pmc, AT91_CKGR_MOR, tmp);
  4596. - }
  4597. + if (!(mor & AT91_PMC_MOSCRCEN))
  4598. + regmap_update_bits(regmap, AT91_CKGR_MOR,
  4599. + MOR_KEY_MASK | AT91_PMC_MOSCRCEN,
  4600. + AT91_PMC_MOSCRCEN | AT91_PMC_KEY);
  4601. - while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCRCS)) {
  4602. - enable_irq(osc->irq);
  4603. - wait_event(osc->wait,
  4604. - pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCRCS);
  4605. - }
  4606. + while (!clk_main_rc_osc_ready(regmap))
  4607. + cpu_relax();
  4608. return 0;
  4609. }
  4610. @@ -250,23 +223,28 @@
  4611. static void clk_main_rc_osc_unprepare(struct clk_hw *hw)
  4612. {
  4613. struct clk_main_rc_osc *osc = to_clk_main_rc_osc(hw);
  4614. - struct at91_pmc *pmc = osc->pmc;
  4615. - u32 tmp = pmc_read(pmc, AT91_CKGR_MOR);
  4616. + struct regmap *regmap = osc->regmap;
  4617. + unsigned int mor;
  4618. - if (!(tmp & AT91_PMC_MOSCRCEN))
  4619. + regmap_read(regmap, AT91_CKGR_MOR, &mor);
  4620. +
  4621. + if (!(mor & AT91_PMC_MOSCRCEN))
  4622. return;
  4623. - tmp &= ~(MOR_KEY_MASK | AT91_PMC_MOSCRCEN);
  4624. - pmc_write(pmc, AT91_CKGR_MOR, tmp | AT91_PMC_KEY);
  4625. + regmap_update_bits(regmap, AT91_CKGR_MOR,
  4626. + MOR_KEY_MASK | AT91_PMC_MOSCRCEN, AT91_PMC_KEY);
  4627. }
  4628. static int clk_main_rc_osc_is_prepared(struct clk_hw *hw)
  4629. {
  4630. struct clk_main_rc_osc *osc = to_clk_main_rc_osc(hw);
  4631. - struct at91_pmc *pmc = osc->pmc;
  4632. + struct regmap *regmap = osc->regmap;
  4633. + unsigned int mor, status;
  4634. +
  4635. + regmap_read(regmap, AT91_CKGR_MOR, &mor);
  4636. + regmap_read(regmap, AT91_PMC_SR, &status);
  4637. - return !!((pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCRCS) &&
  4638. - (pmc_read(pmc, AT91_CKGR_MOR) & AT91_PMC_MOSCRCEN));
  4639. + return (mor & AT91_PMC_MOSCRCEN) && (status & AT91_PMC_MOSCRCS);
  4640. }
  4641. static unsigned long clk_main_rc_osc_recalc_rate(struct clk_hw *hw,
  4642. @@ -294,17 +272,15 @@
  4643. };
  4644. static struct clk * __init
  4645. -at91_clk_register_main_rc_osc(struct at91_pmc *pmc,
  4646. - unsigned int irq,
  4647. +at91_clk_register_main_rc_osc(struct regmap *regmap,
  4648. const char *name,
  4649. u32 frequency, u32 accuracy)
  4650. {
  4651. - int ret;
  4652. struct clk_main_rc_osc *osc;
  4653. struct clk *clk = NULL;
  4654. struct clk_init_data init;
  4655. - if (!pmc || !irq || !name || !frequency)
  4656. + if (!name || !frequency)
  4657. return ERR_PTR(-EINVAL);
  4658. osc = kzalloc(sizeof(*osc), GFP_KERNEL);
  4659. @@ -318,63 +294,53 @@
  4660. init.flags = CLK_IS_ROOT | CLK_IGNORE_UNUSED;
  4661. osc->hw.init = &init;
  4662. - osc->pmc = pmc;
  4663. - osc->irq = irq;
  4664. + osc->regmap = regmap;
  4665. osc->frequency = frequency;
  4666. osc->accuracy = accuracy;
  4667. - init_waitqueue_head(&osc->wait);
  4668. - irq_set_status_flags(osc->irq, IRQ_NOAUTOEN);
  4669. - ret = request_irq(osc->irq, clk_main_rc_osc_irq_handler,
  4670. - IRQF_TRIGGER_HIGH, name, osc);
  4671. - if (ret)
  4672. - return ERR_PTR(ret);
  4673. -
  4674. clk = clk_register(NULL, &osc->hw);
  4675. - if (IS_ERR(clk)) {
  4676. - free_irq(irq, osc);
  4677. + if (IS_ERR(clk))
  4678. kfree(osc);
  4679. - }
  4680. return clk;
  4681. }
  4682. -void __init of_at91sam9x5_clk_main_rc_osc_setup(struct device_node *np,
  4683. - struct at91_pmc *pmc)
  4684. +static void __init of_at91sam9x5_clk_main_rc_osc_setup(struct device_node *np)
  4685. {
  4686. struct clk *clk;
  4687. - unsigned int irq;
  4688. u32 frequency = 0;
  4689. u32 accuracy = 0;
  4690. const char *name = np->name;
  4691. + struct regmap *regmap;
  4692. of_property_read_string(np, "clock-output-names", &name);
  4693. of_property_read_u32(np, "clock-frequency", &frequency);
  4694. of_property_read_u32(np, "clock-accuracy", &accuracy);
  4695. - irq = irq_of_parse_and_map(np, 0);
  4696. - if (!irq)
  4697. + regmap = syscon_node_to_regmap(of_get_parent(np));
  4698. + if (IS_ERR(regmap))
  4699. return;
  4700. - clk = at91_clk_register_main_rc_osc(pmc, irq, name, frequency,
  4701. - accuracy);
  4702. + clk = at91_clk_register_main_rc_osc(regmap, name, frequency, accuracy);
  4703. if (IS_ERR(clk))
  4704. return;
  4705. of_clk_add_provider(np, of_clk_src_simple_get, clk);
  4706. }
  4707. +CLK_OF_DECLARE(at91sam9x5_clk_main_rc_osc, "atmel,at91sam9x5-clk-main-rc-osc",
  4708. + of_at91sam9x5_clk_main_rc_osc_setup);
  4709. -static int clk_main_probe_frequency(struct at91_pmc *pmc)
  4710. +static int clk_main_probe_frequency(struct regmap *regmap)
  4711. {
  4712. unsigned long prep_time, timeout;
  4713. - u32 tmp;
  4714. + unsigned int mcfr;
  4715. timeout = jiffies + usecs_to_jiffies(MAINFRDY_TIMEOUT);
  4716. do {
  4717. prep_time = jiffies;
  4718. - tmp = pmc_read(pmc, AT91_CKGR_MCFR);
  4719. - if (tmp & AT91_PMC_MAINRDY)
  4720. + regmap_read(regmap, AT91_CKGR_MCFR, &mcfr);
  4721. + if (mcfr & AT91_PMC_MAINRDY)
  4722. return 0;
  4723. usleep_range(MAINF_LOOP_MIN_WAIT, MAINF_LOOP_MAX_WAIT);
  4724. } while (time_before(prep_time, timeout));
  4725. @@ -382,34 +348,37 @@
  4726. return -ETIMEDOUT;
  4727. }
  4728. -static unsigned long clk_main_recalc_rate(struct at91_pmc *pmc,
  4729. +static unsigned long clk_main_recalc_rate(struct regmap *regmap,
  4730. unsigned long parent_rate)
  4731. {
  4732. - u32 tmp;
  4733. + unsigned int mcfr;
  4734. if (parent_rate)
  4735. return parent_rate;
  4736. pr_warn("Main crystal frequency not set, using approximate value\n");
  4737. - tmp = pmc_read(pmc, AT91_CKGR_MCFR);
  4738. - if (!(tmp & AT91_PMC_MAINRDY))
  4739. + regmap_read(regmap, AT91_CKGR_MCFR, &mcfr);
  4740. + if (!(mcfr & AT91_PMC_MAINRDY))
  4741. return 0;
  4742. - return ((tmp & AT91_PMC_MAINF) * SLOW_CLOCK_FREQ) / MAINF_DIV;
  4743. + return ((mcfr & AT91_PMC_MAINF) * SLOW_CLOCK_FREQ) / MAINF_DIV;
  4744. }
  4745. static int clk_rm9200_main_prepare(struct clk_hw *hw)
  4746. {
  4747. struct clk_rm9200_main *clkmain = to_clk_rm9200_main(hw);
  4748. - return clk_main_probe_frequency(clkmain->pmc);
  4749. + return clk_main_probe_frequency(clkmain->regmap);
  4750. }
  4751. static int clk_rm9200_main_is_prepared(struct clk_hw *hw)
  4752. {
  4753. struct clk_rm9200_main *clkmain = to_clk_rm9200_main(hw);
  4754. + unsigned int status;
  4755. +
  4756. + regmap_read(clkmain->regmap, AT91_CKGR_MCFR, &status);
  4757. - return !!(pmc_read(clkmain->pmc, AT91_CKGR_MCFR) & AT91_PMC_MAINRDY);
  4758. + return status & AT91_PMC_MAINRDY ? 1 : 0;
  4759. }
  4760. static unsigned long clk_rm9200_main_recalc_rate(struct clk_hw *hw,
  4761. @@ -417,7 +386,7 @@
  4762. {
  4763. struct clk_rm9200_main *clkmain = to_clk_rm9200_main(hw);
  4764. - return clk_main_recalc_rate(clkmain->pmc, parent_rate);
  4765. + return clk_main_recalc_rate(clkmain->regmap, parent_rate);
  4766. }
  4767. static const struct clk_ops rm9200_main_ops = {
  4768. @@ -427,7 +396,7 @@
  4769. };
  4770. static struct clk * __init
  4771. -at91_clk_register_rm9200_main(struct at91_pmc *pmc,
  4772. +at91_clk_register_rm9200_main(struct regmap *regmap,
  4773. const char *name,
  4774. const char *parent_name)
  4775. {
  4776. @@ -435,7 +404,7 @@
  4777. struct clk *clk = NULL;
  4778. struct clk_init_data init;
  4779. - if (!pmc || !name)
  4780. + if (!name)
  4781. return ERR_PTR(-EINVAL);
  4782. if (!parent_name)
  4783. @@ -452,7 +421,7 @@
  4784. init.flags = 0;
  4785. clkmain->hw.init = &init;
  4786. - clkmain->pmc = pmc;
  4787. + clkmain->regmap = regmap;
  4788. clk = clk_register(NULL, &clkmain->hw);
  4789. if (IS_ERR(clk))
  4790. @@ -461,52 +430,54 @@
  4791. return clk;
  4792. }
  4793. -void __init of_at91rm9200_clk_main_setup(struct device_node *np,
  4794. - struct at91_pmc *pmc)
  4795. +static void __init of_at91rm9200_clk_main_setup(struct device_node *np)
  4796. {
  4797. struct clk *clk;
  4798. const char *parent_name;
  4799. const char *name = np->name;
  4800. + struct regmap *regmap;
  4801. parent_name = of_clk_get_parent_name(np, 0);
  4802. of_property_read_string(np, "clock-output-names", &name);
  4803. - clk = at91_clk_register_rm9200_main(pmc, name, parent_name);
  4804. + regmap = syscon_node_to_regmap(of_get_parent(np));
  4805. + if (IS_ERR(regmap))
  4806. + return;
  4807. +
  4808. + clk = at91_clk_register_rm9200_main(regmap, name, parent_name);
  4809. if (IS_ERR(clk))
  4810. return;
  4811. of_clk_add_provider(np, of_clk_src_simple_get, clk);
  4812. }
  4813. +CLK_OF_DECLARE(at91rm9200_clk_main, "atmel,at91rm9200-clk-main",
  4814. + of_at91rm9200_clk_main_setup);
  4815. -static irqreturn_t clk_sam9x5_main_irq_handler(int irq, void *dev_id)
  4816. +static inline bool clk_sam9x5_main_ready(struct regmap *regmap)
  4817. {
  4818. - struct clk_sam9x5_main *clkmain = dev_id;
  4819. + unsigned int status;
  4820. - wake_up(&clkmain->wait);
  4821. - disable_irq_nosync(clkmain->irq);
  4822. + regmap_read(regmap, AT91_PMC_SR, &status);
  4823. - return IRQ_HANDLED;
  4824. + return status & AT91_PMC_MOSCSELS ? 1 : 0;
  4825. }
  4826. static int clk_sam9x5_main_prepare(struct clk_hw *hw)
  4827. {
  4828. struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw);
  4829. - struct at91_pmc *pmc = clkmain->pmc;
  4830. + struct regmap *regmap = clkmain->regmap;
  4831. - while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS)) {
  4832. - enable_irq(clkmain->irq);
  4833. - wait_event(clkmain->wait,
  4834. - pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS);
  4835. - }
  4836. + while (!clk_sam9x5_main_ready(regmap))
  4837. + cpu_relax();
  4838. - return clk_main_probe_frequency(pmc);
  4839. + return clk_main_probe_frequency(regmap);
  4840. }
  4841. static int clk_sam9x5_main_is_prepared(struct clk_hw *hw)
  4842. {
  4843. struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw);
  4844. - return !!(pmc_read(clkmain->pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS);
  4845. + return clk_sam9x5_main_ready(clkmain->regmap);
  4846. }
  4847. static unsigned long clk_sam9x5_main_recalc_rate(struct clk_hw *hw,
  4848. @@ -514,30 +485,28 @@
  4849. {
  4850. struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw);
  4851. - return clk_main_recalc_rate(clkmain->pmc, parent_rate);
  4852. + return clk_main_recalc_rate(clkmain->regmap, parent_rate);
  4853. }
  4854. static int clk_sam9x5_main_set_parent(struct clk_hw *hw, u8 index)
  4855. {
  4856. struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw);
  4857. - struct at91_pmc *pmc = clkmain->pmc;
  4858. - u32 tmp;
  4859. + struct regmap *regmap = clkmain->regmap;
  4860. + unsigned int tmp;
  4861. if (index > 1)
  4862. return -EINVAL;
  4863. - tmp = pmc_read(pmc, AT91_CKGR_MOR) & ~MOR_KEY_MASK;
  4864. + regmap_read(regmap, AT91_CKGR_MOR, &tmp);
  4865. + tmp &= ~MOR_KEY_MASK;
  4866. if (index && !(tmp & AT91_PMC_MOSCSEL))
  4867. - pmc_write(pmc, AT91_CKGR_MOR, tmp | AT91_PMC_MOSCSEL);
  4868. + regmap_write(regmap, AT91_CKGR_MOR, tmp | AT91_PMC_MOSCSEL);
  4869. else if (!index && (tmp & AT91_PMC_MOSCSEL))
  4870. - pmc_write(pmc, AT91_CKGR_MOR, tmp & ~AT91_PMC_MOSCSEL);
  4871. + regmap_write(regmap, AT91_CKGR_MOR, tmp & ~AT91_PMC_MOSCSEL);
  4872. - while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS)) {
  4873. - enable_irq(clkmain->irq);
  4874. - wait_event(clkmain->wait,
  4875. - pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS);
  4876. - }
  4877. + while (!clk_sam9x5_main_ready(regmap))
  4878. + cpu_relax();
  4879. return 0;
  4880. }
  4881. @@ -545,8 +514,11 @@
  4882. static u8 clk_sam9x5_main_get_parent(struct clk_hw *hw)
  4883. {
  4884. struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw);
  4885. + unsigned int status;
  4886. +
  4887. + regmap_read(clkmain->regmap, AT91_CKGR_MOR, &status);
  4888. - return !!(pmc_read(clkmain->pmc, AT91_CKGR_MOR) & AT91_PMC_MOSCEN);
  4889. + return status & AT91_PMC_MOSCEN ? 1 : 0;
  4890. }
  4891. static const struct clk_ops sam9x5_main_ops = {
  4892. @@ -558,18 +530,17 @@
  4893. };
  4894. static struct clk * __init
  4895. -at91_clk_register_sam9x5_main(struct at91_pmc *pmc,
  4896. - unsigned int irq,
  4897. +at91_clk_register_sam9x5_main(struct regmap *regmap,
  4898. const char *name,
  4899. const char **parent_names,
  4900. int num_parents)
  4901. {
  4902. - int ret;
  4903. struct clk_sam9x5_main *clkmain;
  4904. struct clk *clk = NULL;
  4905. struct clk_init_data init;
  4906. + unsigned int status;
  4907. - if (!pmc || !irq || !name)
  4908. + if (!name)
  4909. return ERR_PTR(-EINVAL);
  4910. if (!parent_names || !num_parents)
  4911. @@ -586,51 +557,42 @@
  4912. init.flags = CLK_SET_PARENT_GATE;
  4913. clkmain->hw.init = &init;
  4914. - clkmain->pmc = pmc;
  4915. - clkmain->irq = irq;
  4916. - clkmain->parent = !!(pmc_read(clkmain->pmc, AT91_CKGR_MOR) &
  4917. - AT91_PMC_MOSCEN);
  4918. - init_waitqueue_head(&clkmain->wait);
  4919. - irq_set_status_flags(clkmain->irq, IRQ_NOAUTOEN);
  4920. - ret = request_irq(clkmain->irq, clk_sam9x5_main_irq_handler,
  4921. - IRQF_TRIGGER_HIGH, name, clkmain);
  4922. - if (ret)
  4923. - return ERR_PTR(ret);
  4924. + clkmain->regmap = regmap;
  4925. + regmap_read(clkmain->regmap, AT91_CKGR_MOR, &status);
  4926. + clkmain->parent = status & AT91_PMC_MOSCEN ? 1 : 0;
  4927. clk = clk_register(NULL, &clkmain->hw);
  4928. - if (IS_ERR(clk)) {
  4929. - free_irq(clkmain->irq, clkmain);
  4930. + if (IS_ERR(clk))
  4931. kfree(clkmain);
  4932. - }
  4933. return clk;
  4934. }
  4935. -void __init of_at91sam9x5_clk_main_setup(struct device_node *np,
  4936. - struct at91_pmc *pmc)
  4937. +static void __init of_at91sam9x5_clk_main_setup(struct device_node *np)
  4938. {
  4939. struct clk *clk;
  4940. const char *parent_names[2];
  4941. int num_parents;
  4942. - unsigned int irq;
  4943. const char *name = np->name;
  4944. + struct regmap *regmap;
  4945. num_parents = of_clk_get_parent_count(np);
  4946. if (num_parents <= 0 || num_parents > 2)
  4947. return;
  4948. of_clk_parent_fill(np, parent_names, num_parents);
  4949. + regmap = syscon_node_to_regmap(of_get_parent(np));
  4950. + if (IS_ERR(regmap))
  4951. + return;
  4952. of_property_read_string(np, "clock-output-names", &name);
  4953. - irq = irq_of_parse_and_map(np, 0);
  4954. - if (!irq)
  4955. - return;
  4956. -
  4957. - clk = at91_clk_register_sam9x5_main(pmc, irq, name, parent_names,
  4958. + clk = at91_clk_register_sam9x5_main(regmap, name, parent_names,
  4959. num_parents);
  4960. if (IS_ERR(clk))
  4961. return;
  4962. of_clk_add_provider(np, of_clk_src_simple_get, clk);
  4963. }
  4964. +CLK_OF_DECLARE(at91sam9x5_clk_main, "atmel,at91sam9x5-clk-main",
  4965. + of_at91sam9x5_clk_main_setup);
  4966. diff -Nur linux-4.4.62.orig/drivers/clk/at91/clk-master.c linux-4.4.62/drivers/clk/at91/clk-master.c
  4967. --- linux-4.4.62.orig/drivers/clk/at91/clk-master.c 2017-04-18 07:15:37.000000000 +0200
  4968. +++ linux-4.4.62/drivers/clk/at91/clk-master.c 2017-04-18 17:38:08.042643350 +0200
  4969. @@ -12,13 +12,8 @@
  4970. #include <linux/clkdev.h>
  4971. #include <linux/clk/at91_pmc.h>
  4972. #include <linux/of.h>
  4973. -#include <linux/of_address.h>
  4974. -#include <linux/of_irq.h>
  4975. -#include <linux/io.h>
  4976. -#include <linux/wait.h>
  4977. -#include <linux/sched.h>
  4978. -#include <linux/interrupt.h>
  4979. -#include <linux/irq.h>
  4980. +#include <linux/mfd/syscon.h>
  4981. +#include <linux/regmap.h>
  4982. #include "pmc.h"
  4983. @@ -44,32 +39,26 @@
  4984. struct clk_master {
  4985. struct clk_hw hw;
  4986. - struct at91_pmc *pmc;
  4987. - unsigned int irq;
  4988. - wait_queue_head_t wait;
  4989. + struct regmap *regmap;
  4990. const struct clk_master_layout *layout;
  4991. const struct clk_master_characteristics *characteristics;
  4992. };
  4993. -static irqreturn_t clk_master_irq_handler(int irq, void *dev_id)
  4994. +static inline bool clk_master_ready(struct regmap *regmap)
  4995. {
  4996. - struct clk_master *master = (struct clk_master *)dev_id;
  4997. + unsigned int status;
  4998. - wake_up(&master->wait);
  4999. - disable_irq_nosync(master->irq);
  5000. + regmap_read(regmap, AT91_PMC_SR, &status);
  5001. - return IRQ_HANDLED;
  5002. + return status & AT91_PMC_MCKRDY ? 1 : 0;
  5003. }
  5004. +
  5005. static int clk_master_prepare(struct clk_hw *hw)
  5006. {
  5007. struct clk_master *master = to_clk_master(hw);
  5008. - struct at91_pmc *pmc = master->pmc;
  5009. - while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MCKRDY)) {
  5010. - enable_irq(master->irq);
  5011. - wait_event(master->wait,
  5012. - pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MCKRDY);
  5013. - }
  5014. + while (!clk_master_ready(master->regmap))
  5015. + cpu_relax();
  5016. return 0;
  5017. }
  5018. @@ -78,7 +67,7 @@
  5019. {
  5020. struct clk_master *master = to_clk_master(hw);
  5021. - return !!(pmc_read(master->pmc, AT91_PMC_SR) & AT91_PMC_MCKRDY);
  5022. + return clk_master_ready(master->regmap);
  5023. }
  5024. static unsigned long clk_master_recalc_rate(struct clk_hw *hw,
  5025. @@ -88,18 +77,16 @@
  5026. u8 div;
  5027. unsigned long rate = parent_rate;
  5028. struct clk_master *master = to_clk_master(hw);
  5029. - struct at91_pmc *pmc = master->pmc;
  5030. const struct clk_master_layout *layout = master->layout;
  5031. const struct clk_master_characteristics *characteristics =
  5032. master->characteristics;
  5033. - u32 tmp;
  5034. + unsigned int mckr;
  5035. - pmc_lock(pmc);
  5036. - tmp = pmc_read(pmc, AT91_PMC_MCKR) & layout->mask;
  5037. - pmc_unlock(pmc);
  5038. + regmap_read(master->regmap, AT91_PMC_MCKR, &mckr);
  5039. + mckr &= layout->mask;
  5040. - pres = (tmp >> layout->pres_shift) & MASTER_PRES_MASK;
  5041. - div = (tmp >> MASTER_DIV_SHIFT) & MASTER_DIV_MASK;
  5042. + pres = (mckr >> layout->pres_shift) & MASTER_PRES_MASK;
  5043. + div = (mckr >> MASTER_DIV_SHIFT) & MASTER_DIV_MASK;
  5044. if (characteristics->have_div3_pres && pres == MASTER_PRES_MAX)
  5045. rate /= 3;
  5046. @@ -119,9 +106,11 @@
  5047. static u8 clk_master_get_parent(struct clk_hw *hw)
  5048. {
  5049. struct clk_master *master = to_clk_master(hw);
  5050. - struct at91_pmc *pmc = master->pmc;
  5051. + unsigned int mckr;
  5052. - return pmc_read(pmc, AT91_PMC_MCKR) & AT91_PMC_CSS;
  5053. + regmap_read(master->regmap, AT91_PMC_MCKR, &mckr);
  5054. +
  5055. + return mckr & AT91_PMC_CSS;
  5056. }
  5057. static const struct clk_ops master_ops = {
  5058. @@ -132,18 +121,17 @@
  5059. };
  5060. static struct clk * __init
  5061. -at91_clk_register_master(struct at91_pmc *pmc, unsigned int irq,
  5062. +at91_clk_register_master(struct regmap *regmap,
  5063. const char *name, int num_parents,
  5064. const char **parent_names,
  5065. const struct clk_master_layout *layout,
  5066. const struct clk_master_characteristics *characteristics)
  5067. {
  5068. - int ret;
  5069. struct clk_master *master;
  5070. struct clk *clk = NULL;
  5071. struct clk_init_data init;
  5072. - if (!pmc || !irq || !name || !num_parents || !parent_names)
  5073. + if (!name || !num_parents || !parent_names)
  5074. return ERR_PTR(-EINVAL);
  5075. master = kzalloc(sizeof(*master), GFP_KERNEL);
  5076. @@ -159,20 +147,10 @@
  5077. master->hw.init = &init;
  5078. master->layout = layout;
  5079. master->characteristics = characteristics;
  5080. - master->pmc = pmc;
  5081. - master->irq = irq;
  5082. - init_waitqueue_head(&master->wait);
  5083. - irq_set_status_flags(master->irq, IRQ_NOAUTOEN);
  5084. - ret = request_irq(master->irq, clk_master_irq_handler,
  5085. - IRQF_TRIGGER_HIGH, "clk-master", master);
  5086. - if (ret) {
  5087. - kfree(master);
  5088. - return ERR_PTR(ret);
  5089. - }
  5090. + master->regmap = regmap;
  5091. clk = clk_register(NULL, &master->hw);
  5092. if (IS_ERR(clk)) {
  5093. - free_irq(master->irq, master);
  5094. kfree(master);
  5095. }
  5096. @@ -217,15 +195,15 @@
  5097. }
  5098. static void __init
  5099. -of_at91_clk_master_setup(struct device_node *np, struct at91_pmc *pmc,
  5100. +of_at91_clk_master_setup(struct device_node *np,
  5101. const struct clk_master_layout *layout)
  5102. {
  5103. struct clk *clk;
  5104. int num_parents;
  5105. - unsigned int irq;
  5106. const char *parent_names[MASTER_SOURCE_MAX];
  5107. const char *name = np->name;
  5108. struct clk_master_characteristics *characteristics;
  5109. + struct regmap *regmap;
  5110. num_parents = of_clk_get_parent_count(np);
  5111. if (num_parents <= 0 || num_parents > MASTER_SOURCE_MAX)
  5112. @@ -239,11 +217,11 @@
  5113. if (!characteristics)
  5114. return;
  5115. - irq = irq_of_parse_and_map(np, 0);
  5116. - if (!irq)
  5117. - goto out_free_characteristics;
  5118. + regmap = syscon_node_to_regmap(of_get_parent(np));
  5119. + if (IS_ERR(regmap))
  5120. + return;
  5121. - clk = at91_clk_register_master(pmc, irq, name, num_parents,
  5122. + clk = at91_clk_register_master(regmap, name, num_parents,
  5123. parent_names, layout,
  5124. characteristics);
  5125. if (IS_ERR(clk))
  5126. @@ -256,14 +234,16 @@
  5127. kfree(characteristics);
  5128. }
  5129. -void __init of_at91rm9200_clk_master_setup(struct device_node *np,
  5130. - struct at91_pmc *pmc)
  5131. +static void __init of_at91rm9200_clk_master_setup(struct device_node *np)
  5132. {
  5133. - of_at91_clk_master_setup(np, pmc, &at91rm9200_master_layout);
  5134. + of_at91_clk_master_setup(np, &at91rm9200_master_layout);
  5135. }
  5136. +CLK_OF_DECLARE(at91rm9200_clk_master, "atmel,at91rm9200-clk-master",
  5137. + of_at91rm9200_clk_master_setup);
  5138. -void __init of_at91sam9x5_clk_master_setup(struct device_node *np,
  5139. - struct at91_pmc *pmc)
  5140. +static void __init of_at91sam9x5_clk_master_setup(struct device_node *np)
  5141. {
  5142. - of_at91_clk_master_setup(np, pmc, &at91sam9x5_master_layout);
  5143. + of_at91_clk_master_setup(np, &at91sam9x5_master_layout);
  5144. }
  5145. +CLK_OF_DECLARE(at91sam9x5_clk_master, "atmel,at91sam9x5-clk-master",
  5146. + of_at91sam9x5_clk_master_setup);
  5147. diff -Nur linux-4.4.62.orig/drivers/clk/at91/clk-peripheral.c linux-4.4.62/drivers/clk/at91/clk-peripheral.c
  5148. --- linux-4.4.62.orig/drivers/clk/at91/clk-peripheral.c 2017-04-18 07:15:37.000000000 +0200
  5149. +++ linux-4.4.62/drivers/clk/at91/clk-peripheral.c 2017-04-18 17:38:08.042643350 +0200
  5150. @@ -12,11 +12,13 @@
  5151. #include <linux/clkdev.h>
  5152. #include <linux/clk/at91_pmc.h>
  5153. #include <linux/of.h>
  5154. -#include <linux/of_address.h>
  5155. -#include <linux/io.h>
  5156. +#include <linux/mfd/syscon.h>
  5157. +#include <linux/regmap.h>
  5158. #include "pmc.h"
  5159. +DEFINE_SPINLOCK(pmc_pcr_lock);
  5160. +
  5161. #define PERIPHERAL_MAX 64
  5162. #define PERIPHERAL_AT91RM9200 0
  5163. @@ -33,7 +35,7 @@
  5164. struct clk_peripheral {
  5165. struct clk_hw hw;
  5166. - struct at91_pmc *pmc;
  5167. + struct regmap *regmap;
  5168. u32 id;
  5169. };
  5170. @@ -41,8 +43,9 @@
  5171. struct clk_sam9x5_peripheral {
  5172. struct clk_hw hw;
  5173. - struct at91_pmc *pmc;
  5174. + struct regmap *regmap;
  5175. struct clk_range range;
  5176. + spinlock_t *lock;
  5177. u32 id;
  5178. u32 div;
  5179. bool auto_div;
  5180. @@ -54,7 +57,6 @@
  5181. static int clk_peripheral_enable(struct clk_hw *hw)
  5182. {
  5183. struct clk_peripheral *periph = to_clk_peripheral(hw);
  5184. - struct at91_pmc *pmc = periph->pmc;
  5185. int offset = AT91_PMC_PCER;
  5186. u32 id = periph->id;
  5187. @@ -62,14 +64,14 @@
  5188. return 0;
  5189. if (id > PERIPHERAL_ID_MAX)
  5190. offset = AT91_PMC_PCER1;
  5191. - pmc_write(pmc, offset, PERIPHERAL_MASK(id));
  5192. + regmap_write(periph->regmap, offset, PERIPHERAL_MASK(id));
  5193. +
  5194. return 0;
  5195. }
  5196. static void clk_peripheral_disable(struct clk_hw *hw)
  5197. {
  5198. struct clk_peripheral *periph = to_clk_peripheral(hw);
  5199. - struct at91_pmc *pmc = periph->pmc;
  5200. int offset = AT91_PMC_PCDR;
  5201. u32 id = periph->id;
  5202. @@ -77,21 +79,23 @@
  5203. return;
  5204. if (id > PERIPHERAL_ID_MAX)
  5205. offset = AT91_PMC_PCDR1;
  5206. - pmc_write(pmc, offset, PERIPHERAL_MASK(id));
  5207. + regmap_write(periph->regmap, offset, PERIPHERAL_MASK(id));
  5208. }
  5209. static int clk_peripheral_is_enabled(struct clk_hw *hw)
  5210. {
  5211. struct clk_peripheral *periph = to_clk_peripheral(hw);
  5212. - struct at91_pmc *pmc = periph->pmc;
  5213. int offset = AT91_PMC_PCSR;
  5214. + unsigned int status;
  5215. u32 id = periph->id;
  5216. if (id < PERIPHERAL_ID_MIN)
  5217. return 1;
  5218. if (id > PERIPHERAL_ID_MAX)
  5219. offset = AT91_PMC_PCSR1;
  5220. - return !!(pmc_read(pmc, offset) & PERIPHERAL_MASK(id));
  5221. + regmap_read(periph->regmap, offset, &status);
  5222. +
  5223. + return status & PERIPHERAL_MASK(id) ? 1 : 0;
  5224. }
  5225. static const struct clk_ops peripheral_ops = {
  5226. @@ -101,14 +105,14 @@
  5227. };
  5228. static struct clk * __init
  5229. -at91_clk_register_peripheral(struct at91_pmc *pmc, const char *name,
  5230. +at91_clk_register_peripheral(struct regmap *regmap, const char *name,
  5231. const char *parent_name, u32 id)
  5232. {
  5233. struct clk_peripheral *periph;
  5234. struct clk *clk = NULL;
  5235. struct clk_init_data init;
  5236. - if (!pmc || !name || !parent_name || id > PERIPHERAL_ID_MAX)
  5237. + if (!name || !parent_name || id > PERIPHERAL_ID_MAX)
  5238. return ERR_PTR(-EINVAL);
  5239. periph = kzalloc(sizeof(*periph), GFP_KERNEL);
  5240. @@ -123,7 +127,7 @@
  5241. periph->id = id;
  5242. periph->hw.init = &init;
  5243. - periph->pmc = pmc;
  5244. + periph->regmap = regmap;
  5245. clk = clk_register(NULL, &periph->hw);
  5246. if (IS_ERR(clk))
  5247. @@ -160,53 +164,58 @@
  5248. static int clk_sam9x5_peripheral_enable(struct clk_hw *hw)
  5249. {
  5250. struct clk_sam9x5_peripheral *periph = to_clk_sam9x5_peripheral(hw);
  5251. - struct at91_pmc *pmc = periph->pmc;
  5252. - u32 tmp;
  5253. + unsigned long flags;
  5254. if (periph->id < PERIPHERAL_ID_MIN)
  5255. return 0;
  5256. - pmc_lock(pmc);
  5257. - pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID_MASK));
  5258. - tmp = pmc_read(pmc, AT91_PMC_PCR) & ~AT91_PMC_PCR_DIV_MASK;
  5259. - pmc_write(pmc, AT91_PMC_PCR, tmp | AT91_PMC_PCR_DIV(periph->div)
  5260. - | AT91_PMC_PCR_CMD
  5261. - | AT91_PMC_PCR_EN);
  5262. - pmc_unlock(pmc);
  5263. + spin_lock_irqsave(periph->lock, flags);
  5264. + regmap_write(periph->regmap, AT91_PMC_PCR,
  5265. + (periph->id & AT91_PMC_PCR_PID_MASK));
  5266. + regmap_update_bits(periph->regmap, AT91_PMC_PCR,
  5267. + AT91_PMC_PCR_DIV_MASK | AT91_PMC_PCR_CMD |
  5268. + AT91_PMC_PCR_EN,
  5269. + AT91_PMC_PCR_DIV(periph->div) |
  5270. + AT91_PMC_PCR_CMD |
  5271. + AT91_PMC_PCR_EN);
  5272. + spin_unlock_irqrestore(periph->lock, flags);
  5273. +
  5274. return 0;
  5275. }
  5276. static void clk_sam9x5_peripheral_disable(struct clk_hw *hw)
  5277. {
  5278. struct clk_sam9x5_peripheral *periph = to_clk_sam9x5_peripheral(hw);
  5279. - struct at91_pmc *pmc = periph->pmc;
  5280. - u32 tmp;
  5281. + unsigned long flags;
  5282. if (periph->id < PERIPHERAL_ID_MIN)
  5283. return;
  5284. - pmc_lock(pmc);
  5285. - pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID_MASK));
  5286. - tmp = pmc_read(pmc, AT91_PMC_PCR) & ~AT91_PMC_PCR_EN;
  5287. - pmc_write(pmc, AT91_PMC_PCR, tmp | AT91_PMC_PCR_CMD);
  5288. - pmc_unlock(pmc);
  5289. + spin_lock_irqsave(periph->lock, flags);
  5290. + regmap_write(periph->regmap, AT91_PMC_PCR,
  5291. + (periph->id & AT91_PMC_PCR_PID_MASK));
  5292. + regmap_update_bits(periph->regmap, AT91_PMC_PCR,
  5293. + AT91_PMC_PCR_EN | AT91_PMC_PCR_CMD,
  5294. + AT91_PMC_PCR_CMD);
  5295. + spin_unlock_irqrestore(periph->lock, flags);
  5296. }
  5297. static int clk_sam9x5_peripheral_is_enabled(struct clk_hw *hw)
  5298. {
  5299. struct clk_sam9x5_peripheral *periph = to_clk_sam9x5_peripheral(hw);
  5300. - struct at91_pmc *pmc = periph->pmc;
  5301. - int ret;
  5302. + unsigned long flags;
  5303. + unsigned int status;
  5304. if (periph->id < PERIPHERAL_ID_MIN)
  5305. return 1;
  5306. - pmc_lock(pmc);
  5307. - pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID_MASK));
  5308. - ret = !!(pmc_read(pmc, AT91_PMC_PCR) & AT91_PMC_PCR_EN);
  5309. - pmc_unlock(pmc);
  5310. + spin_lock_irqsave(periph->lock, flags);
  5311. + regmap_write(periph->regmap, AT91_PMC_PCR,
  5312. + (periph->id & AT91_PMC_PCR_PID_MASK));
  5313. + regmap_read(periph->regmap, AT91_PMC_PCR, &status);
  5314. + spin_unlock_irqrestore(periph->lock, flags);
  5315. - return ret;
  5316. + return status & AT91_PMC_PCR_EN ? 1 : 0;
  5317. }
  5318. static unsigned long
  5319. @@ -214,19 +223,20 @@
  5320. unsigned long parent_rate)
  5321. {
  5322. struct clk_sam9x5_peripheral *periph = to_clk_sam9x5_peripheral(hw);
  5323. - struct at91_pmc *pmc = periph->pmc;
  5324. - u32 tmp;
  5325. + unsigned long flags;
  5326. + unsigned int status;
  5327. if (periph->id < PERIPHERAL_ID_MIN)
  5328. return parent_rate;
  5329. - pmc_lock(pmc);
  5330. - pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID_MASK));
  5331. - tmp = pmc_read(pmc, AT91_PMC_PCR);
  5332. - pmc_unlock(pmc);
  5333. + spin_lock_irqsave(periph->lock, flags);
  5334. + regmap_write(periph->regmap, AT91_PMC_PCR,
  5335. + (periph->id & AT91_PMC_PCR_PID_MASK));
  5336. + regmap_read(periph->regmap, AT91_PMC_PCR, &status);
  5337. + spin_unlock_irqrestore(periph->lock, flags);
  5338. - if (tmp & AT91_PMC_PCR_EN) {
  5339. - periph->div = PERIPHERAL_RSHIFT(tmp);
  5340. + if (status & AT91_PMC_PCR_EN) {
  5341. + periph->div = PERIPHERAL_RSHIFT(status);
  5342. periph->auto_div = false;
  5343. } else {
  5344. clk_sam9x5_peripheral_autodiv(periph);
  5345. @@ -318,15 +328,15 @@
  5346. };
  5347. static struct clk * __init
  5348. -at91_clk_register_sam9x5_peripheral(struct at91_pmc *pmc, const char *name,
  5349. - const char *parent_name, u32 id,
  5350. - const struct clk_range *range)
  5351. +at91_clk_register_sam9x5_peripheral(struct regmap *regmap, spinlock_t *lock,
  5352. + const char *name, const char *parent_name,
  5353. + u32 id, const struct clk_range *range)
  5354. {
  5355. struct clk_sam9x5_peripheral *periph;
  5356. struct clk *clk = NULL;
  5357. struct clk_init_data init;
  5358. - if (!pmc || !name || !parent_name)
  5359. + if (!name || !parent_name)
  5360. return ERR_PTR(-EINVAL);
  5361. periph = kzalloc(sizeof(*periph), GFP_KERNEL);
  5362. @@ -342,7 +352,8 @@
  5363. periph->id = id;
  5364. periph->hw.init = &init;
  5365. periph->div = 0;
  5366. - periph->pmc = pmc;
  5367. + periph->regmap = regmap;
  5368. + periph->lock = lock;
  5369. periph->auto_div = true;
  5370. periph->range = *range;
  5371. @@ -356,7 +367,7 @@
  5372. }
  5373. static void __init
  5374. -of_at91_clk_periph_setup(struct device_node *np, struct at91_pmc *pmc, u8 type)
  5375. +of_at91_clk_periph_setup(struct device_node *np, u8 type)
  5376. {
  5377. int num;
  5378. u32 id;
  5379. @@ -364,6 +375,7 @@
  5380. const char *parent_name;
  5381. const char *name;
  5382. struct device_node *periphclknp;
  5383. + struct regmap *regmap;
  5384. parent_name = of_clk_get_parent_name(np, 0);
  5385. if (!parent_name)
  5386. @@ -373,6 +385,10 @@
  5387. if (!num || num > PERIPHERAL_MAX)
  5388. return;
  5389. + regmap = syscon_node_to_regmap(of_get_parent(np));
  5390. + if (IS_ERR(regmap))
  5391. + return;
  5392. +
  5393. for_each_child_of_node(np, periphclknp) {
  5394. if (of_property_read_u32(periphclknp, "reg", &id))
  5395. continue;
  5396. @@ -384,7 +400,7 @@
  5397. name = periphclknp->name;
  5398. if (type == PERIPHERAL_AT91RM9200) {
  5399. - clk = at91_clk_register_peripheral(pmc, name,
  5400. + clk = at91_clk_register_peripheral(regmap, name,
  5401. parent_name, id);
  5402. } else {
  5403. struct clk_range range = CLK_RANGE(0, 0);
  5404. @@ -393,7 +409,9 @@
  5405. "atmel,clk-output-range",
  5406. &range);
  5407. - clk = at91_clk_register_sam9x5_peripheral(pmc, name,
  5408. + clk = at91_clk_register_sam9x5_peripheral(regmap,
  5409. + &pmc_pcr_lock,
  5410. + name,
  5411. parent_name,
  5412. id, &range);
  5413. }
  5414. @@ -405,14 +423,16 @@
  5415. }
  5416. }
  5417. -void __init of_at91rm9200_clk_periph_setup(struct device_node *np,
  5418. - struct at91_pmc *pmc)
  5419. +static void __init of_at91rm9200_clk_periph_setup(struct device_node *np)
  5420. {
  5421. - of_at91_clk_periph_setup(np, pmc, PERIPHERAL_AT91RM9200);
  5422. + of_at91_clk_periph_setup(np, PERIPHERAL_AT91RM9200);
  5423. }
  5424. +CLK_OF_DECLARE(at91rm9200_clk_periph, "atmel,at91rm9200-clk-peripheral",
  5425. + of_at91rm9200_clk_periph_setup);
  5426. -void __init of_at91sam9x5_clk_periph_setup(struct device_node *np,
  5427. - struct at91_pmc *pmc)
  5428. +static void __init of_at91sam9x5_clk_periph_setup(struct device_node *np)
  5429. {
  5430. - of_at91_clk_periph_setup(np, pmc, PERIPHERAL_AT91SAM9X5);
  5431. + of_at91_clk_periph_setup(np, PERIPHERAL_AT91SAM9X5);
  5432. }
  5433. +CLK_OF_DECLARE(at91sam9x5_clk_periph, "atmel,at91sam9x5-clk-peripheral",
  5434. + of_at91sam9x5_clk_periph_setup);
  5435. diff -Nur linux-4.4.62.orig/drivers/clk/at91/clk-pll.c linux-4.4.62/drivers/clk/at91/clk-pll.c
  5436. --- linux-4.4.62.orig/drivers/clk/at91/clk-pll.c 2017-04-18 07:15:37.000000000 +0200
  5437. +++ linux-4.4.62/drivers/clk/at91/clk-pll.c 2017-04-18 17:38:08.042643350 +0200
  5438. @@ -12,14 +12,8 @@
  5439. #include <linux/clkdev.h>
  5440. #include <linux/clk/at91_pmc.h>
  5441. #include <linux/of.h>
  5442. -#include <linux/of_address.h>
  5443. -#include <linux/of_irq.h>
  5444. -#include <linux/io.h>
  5445. -#include <linux/kernel.h>
  5446. -#include <linux/wait.h>
  5447. -#include <linux/sched.h>
  5448. -#include <linux/interrupt.h>
  5449. -#include <linux/irq.h>
  5450. +#include <linux/mfd/syscon.h>
  5451. +#include <linux/regmap.h>
  5452. #include "pmc.h"
  5453. @@ -58,9 +52,7 @@
  5454. struct clk_pll {
  5455. struct clk_hw hw;
  5456. - struct at91_pmc *pmc;
  5457. - unsigned int irq;
  5458. - wait_queue_head_t wait;
  5459. + struct regmap *regmap;
  5460. u8 id;
  5461. u8 div;
  5462. u8 range;
  5463. @@ -69,20 +61,19 @@
  5464. const struct clk_pll_characteristics *characteristics;
  5465. };
  5466. -static irqreturn_t clk_pll_irq_handler(int irq, void *dev_id)
  5467. +static inline bool clk_pll_ready(struct regmap *regmap, int id)
  5468. {
  5469. - struct clk_pll *pll = (struct clk_pll *)dev_id;
  5470. + unsigned int status;
  5471. - wake_up(&pll->wait);
  5472. - disable_irq_nosync(pll->irq);
  5473. + regmap_read(regmap, AT91_PMC_SR, &status);
  5474. - return IRQ_HANDLED;
  5475. + return status & PLL_STATUS_MASK(id) ? 1 : 0;
  5476. }
  5477. static int clk_pll_prepare(struct clk_hw *hw)
  5478. {
  5479. struct clk_pll *pll = to_clk_pll(hw);
  5480. - struct at91_pmc *pmc = pll->pmc;
  5481. + struct regmap *regmap = pll->regmap;
  5482. const struct clk_pll_layout *layout = pll->layout;
  5483. const struct clk_pll_characteristics *characteristics =
  5484. pll->characteristics;
  5485. @@ -90,39 +81,34 @@
  5486. u32 mask = PLL_STATUS_MASK(id);
  5487. int offset = PLL_REG(id);
  5488. u8 out = 0;
  5489. - u32 pllr, icpr;
  5490. + unsigned int pllr;
  5491. + unsigned int status;
  5492. u8 div;
  5493. u16 mul;
  5494. - pllr = pmc_read(pmc, offset);
  5495. + regmap_read(regmap, offset, &pllr);
  5496. div = PLL_DIV(pllr);
  5497. mul = PLL_MUL(pllr, layout);
  5498. - if ((pmc_read(pmc, AT91_PMC_SR) & mask) &&
  5499. + regmap_read(regmap, AT91_PMC_SR, &status);
  5500. + if ((status & mask) &&
  5501. (div == pll->div && mul == pll->mul))
  5502. return 0;
  5503. if (characteristics->out)
  5504. out = characteristics->out[pll->range];
  5505. - if (characteristics->icpll) {
  5506. - icpr = pmc_read(pmc, AT91_PMC_PLLICPR) & ~PLL_ICPR_MASK(id);
  5507. - icpr |= (characteristics->icpll[pll->range] <<
  5508. - PLL_ICPR_SHIFT(id));
  5509. - pmc_write(pmc, AT91_PMC_PLLICPR, icpr);
  5510. - }
  5511. - pllr &= ~layout->pllr_mask;
  5512. - pllr |= layout->pllr_mask &
  5513. - (pll->div | (PLL_MAX_COUNT << PLL_COUNT_SHIFT) |
  5514. - (out << PLL_OUT_SHIFT) |
  5515. - ((pll->mul & layout->mul_mask) << layout->mul_shift));
  5516. - pmc_write(pmc, offset, pllr);
  5517. -
  5518. - while (!(pmc_read(pmc, AT91_PMC_SR) & mask)) {
  5519. - enable_irq(pll->irq);
  5520. - wait_event(pll->wait,
  5521. - pmc_read(pmc, AT91_PMC_SR) & mask);
  5522. - }
  5523. + if (characteristics->icpll)
  5524. + regmap_update_bits(regmap, AT91_PMC_PLLICPR, PLL_ICPR_MASK(id),
  5525. + characteristics->icpll[pll->range] << PLL_ICPR_SHIFT(id));
  5526. +
  5527. + regmap_update_bits(regmap, offset, layout->pllr_mask,
  5528. + pll->div | (PLL_MAX_COUNT << PLL_COUNT_SHIFT) |
  5529. + (out << PLL_OUT_SHIFT) |
  5530. + ((pll->mul & layout->mul_mask) << layout->mul_shift));
  5531. +
  5532. + while (!clk_pll_ready(regmap, pll->id))
  5533. + cpu_relax();
  5534. return 0;
  5535. }
  5536. @@ -130,32 +116,35 @@
  5537. static int clk_pll_is_prepared(struct clk_hw *hw)
  5538. {
  5539. struct clk_pll *pll = to_clk_pll(hw);
  5540. - struct at91_pmc *pmc = pll->pmc;
  5541. - return !!(pmc_read(pmc, AT91_PMC_SR) &
  5542. - PLL_STATUS_MASK(pll->id));
  5543. + return clk_pll_ready(pll->regmap, pll->id);
  5544. }
  5545. static void clk_pll_unprepare(struct clk_hw *hw)
  5546. {
  5547. struct clk_pll *pll = to_clk_pll(hw);
  5548. - struct at91_pmc *pmc = pll->pmc;
  5549. - const struct clk_pll_layout *layout = pll->layout;
  5550. - int offset = PLL_REG(pll->id);
  5551. - u32 tmp = pmc_read(pmc, offset) & ~(layout->pllr_mask);
  5552. + unsigned int mask = pll->layout->pllr_mask;
  5553. - pmc_write(pmc, offset, tmp);
  5554. + regmap_update_bits(pll->regmap, PLL_REG(pll->id), mask, ~mask);
  5555. }
  5556. static unsigned long clk_pll_recalc_rate(struct clk_hw *hw,
  5557. unsigned long parent_rate)
  5558. {
  5559. struct clk_pll *pll = to_clk_pll(hw);
  5560. + unsigned int pllr;
  5561. + u16 mul;
  5562. + u8 div;
  5563. +
  5564. + regmap_read(pll->regmap, PLL_REG(pll->id), &pllr);
  5565. +
  5566. + div = PLL_DIV(pllr);
  5567. + mul = PLL_MUL(pllr, pll->layout);
  5568. - if (!pll->div || !pll->mul)
  5569. + if (!div || !mul)
  5570. return 0;
  5571. - return (parent_rate / pll->div) * (pll->mul + 1);
  5572. + return (parent_rate / div) * (mul + 1);
  5573. }
  5574. static long clk_pll_get_best_div_mul(struct clk_pll *pll, unsigned long rate,
  5575. @@ -308,7 +297,7 @@
  5576. };
  5577. static struct clk * __init
  5578. -at91_clk_register_pll(struct at91_pmc *pmc, unsigned int irq, const char *name,
  5579. +at91_clk_register_pll(struct regmap *regmap, const char *name,
  5580. const char *parent_name, u8 id,
  5581. const struct clk_pll_layout *layout,
  5582. const struct clk_pll_characteristics *characteristics)
  5583. @@ -316,9 +305,8 @@
  5584. struct clk_pll *pll;
  5585. struct clk *clk = NULL;
  5586. struct clk_init_data init;
  5587. - int ret;
  5588. int offset = PLL_REG(id);
  5589. - u32 tmp;
  5590. + unsigned int pllr;
  5591. if (id > PLL_MAX_ID)
  5592. return ERR_PTR(-EINVAL);
  5593. @@ -337,23 +325,13 @@
  5594. pll->hw.init = &init;
  5595. pll->layout = layout;
  5596. pll->characteristics = characteristics;
  5597. - pll->pmc = pmc;
  5598. - pll->irq = irq;
  5599. - tmp = pmc_read(pmc, offset) & layout->pllr_mask;
  5600. - pll->div = PLL_DIV(tmp);
  5601. - pll->mul = PLL_MUL(tmp, layout);
  5602. - init_waitqueue_head(&pll->wait);
  5603. - irq_set_status_flags(pll->irq, IRQ_NOAUTOEN);
  5604. - ret = request_irq(pll->irq, clk_pll_irq_handler, IRQF_TRIGGER_HIGH,
  5605. - id ? "clk-pllb" : "clk-plla", pll);
  5606. - if (ret) {
  5607. - kfree(pll);
  5608. - return ERR_PTR(ret);
  5609. - }
  5610. + pll->regmap = regmap;
  5611. + regmap_read(regmap, offset, &pllr);
  5612. + pll->div = PLL_DIV(pllr);
  5613. + pll->mul = PLL_MUL(pllr, layout);
  5614. clk = clk_register(NULL, &pll->hw);
  5615. if (IS_ERR(clk)) {
  5616. - free_irq(pll->irq, pll);
  5617. kfree(pll);
  5618. }
  5619. @@ -483,12 +461,12 @@
  5620. }
  5621. static void __init
  5622. -of_at91_clk_pll_setup(struct device_node *np, struct at91_pmc *pmc,
  5623. +of_at91_clk_pll_setup(struct device_node *np,
  5624. const struct clk_pll_layout *layout)
  5625. {
  5626. u32 id;
  5627. - unsigned int irq;
  5628. struct clk *clk;
  5629. + struct regmap *regmap;
  5630. const char *parent_name;
  5631. const char *name = np->name;
  5632. struct clk_pll_characteristics *characteristics;
  5633. @@ -500,15 +478,15 @@
  5634. of_property_read_string(np, "clock-output-names", &name);
  5635. - characteristics = of_at91_clk_pll_get_characteristics(np);
  5636. - if (!characteristics)
  5637. + regmap = syscon_node_to_regmap(of_get_parent(np));
  5638. + if (IS_ERR(regmap))
  5639. return;
  5640. - irq = irq_of_parse_and_map(np, 0);
  5641. - if (!irq)
  5642. + characteristics = of_at91_clk_pll_get_characteristics(np);
  5643. + if (!characteristics)
  5644. return;
  5645. - clk = at91_clk_register_pll(pmc, irq, name, parent_name, id, layout,
  5646. + clk = at91_clk_register_pll(regmap, name, parent_name, id, layout,
  5647. characteristics);
  5648. if (IS_ERR(clk))
  5649. goto out_free_characteristics;
  5650. @@ -520,26 +498,30 @@
  5651. kfree(characteristics);
  5652. }
  5653. -void __init of_at91rm9200_clk_pll_setup(struct device_node *np,
  5654. - struct at91_pmc *pmc)
  5655. +static void __init of_at91rm9200_clk_pll_setup(struct device_node *np)
  5656. {
  5657. - of_at91_clk_pll_setup(np, pmc, &at91rm9200_pll_layout);
  5658. + of_at91_clk_pll_setup(np, &at91rm9200_pll_layout);
  5659. }
  5660. +CLK_OF_DECLARE(at91rm9200_clk_pll, "atmel,at91rm9200-clk-pll",
  5661. + of_at91rm9200_clk_pll_setup);
  5662. -void __init of_at91sam9g45_clk_pll_setup(struct device_node *np,
  5663. - struct at91_pmc *pmc)
  5664. +static void __init of_at91sam9g45_clk_pll_setup(struct device_node *np)
  5665. {
  5666. - of_at91_clk_pll_setup(np, pmc, &at91sam9g45_pll_layout);
  5667. + of_at91_clk_pll_setup(np, &at91sam9g45_pll_layout);
  5668. }
  5669. +CLK_OF_DECLARE(at91sam9g45_clk_pll, "atmel,at91sam9g45-clk-pll",
  5670. + of_at91sam9g45_clk_pll_setup);
  5671. -void __init of_at91sam9g20_clk_pllb_setup(struct device_node *np,
  5672. - struct at91_pmc *pmc)
  5673. +static void __init of_at91sam9g20_clk_pllb_setup(struct device_node *np)
  5674. {
  5675. - of_at91_clk_pll_setup(np, pmc, &at91sam9g20_pllb_layout);
  5676. + of_at91_clk_pll_setup(np, &at91sam9g20_pllb_layout);
  5677. }
  5678. +CLK_OF_DECLARE(at91sam9g20_clk_pllb, "atmel,at91sam9g20-clk-pllb",
  5679. + of_at91sam9g20_clk_pllb_setup);
  5680. -void __init of_sama5d3_clk_pll_setup(struct device_node *np,
  5681. - struct at91_pmc *pmc)
  5682. +static void __init of_sama5d3_clk_pll_setup(struct device_node *np)
  5683. {
  5684. - of_at91_clk_pll_setup(np, pmc, &sama5d3_pll_layout);
  5685. + of_at91_clk_pll_setup(np, &sama5d3_pll_layout);
  5686. }
  5687. +CLK_OF_DECLARE(sama5d3_clk_pll, "atmel,sama5d3-clk-pll",
  5688. + of_sama5d3_clk_pll_setup);
  5689. diff -Nur linux-4.4.62.orig/drivers/clk/at91/clk-plldiv.c linux-4.4.62/drivers/clk/at91/clk-plldiv.c
  5690. --- linux-4.4.62.orig/drivers/clk/at91/clk-plldiv.c 2017-04-18 07:15:37.000000000 +0200
  5691. +++ linux-4.4.62/drivers/clk/at91/clk-plldiv.c 2017-04-18 17:38:08.042643350 +0200
  5692. @@ -12,8 +12,8 @@
  5693. #include <linux/clkdev.h>
  5694. #include <linux/clk/at91_pmc.h>
  5695. #include <linux/of.h>
  5696. -#include <linux/of_address.h>
  5697. -#include <linux/io.h>
  5698. +#include <linux/mfd/syscon.h>
  5699. +#include <linux/regmap.h>
  5700. #include "pmc.h"
  5701. @@ -21,16 +21,18 @@
  5702. struct clk_plldiv {
  5703. struct clk_hw hw;
  5704. - struct at91_pmc *pmc;
  5705. + struct regmap *regmap;
  5706. };
  5707. static unsigned long clk_plldiv_recalc_rate(struct clk_hw *hw,
  5708. unsigned long parent_rate)
  5709. {
  5710. struct clk_plldiv *plldiv = to_clk_plldiv(hw);
  5711. - struct at91_pmc *pmc = plldiv->pmc;
  5712. + unsigned int mckr;
  5713. - if (pmc_read(pmc, AT91_PMC_MCKR) & AT91_PMC_PLLADIV2)
  5714. + regmap_read(plldiv->regmap, AT91_PMC_MCKR, &mckr);
  5715. +
  5716. + if (mckr & AT91_PMC_PLLADIV2)
  5717. return parent_rate / 2;
  5718. return parent_rate;
  5719. @@ -57,18 +59,12 @@
  5720. unsigned long parent_rate)
  5721. {
  5722. struct clk_plldiv *plldiv = to_clk_plldiv(hw);
  5723. - struct at91_pmc *pmc = plldiv->pmc;
  5724. - u32 tmp;
  5725. - if (parent_rate != rate && (parent_rate / 2) != rate)
  5726. + if ((parent_rate != rate) && (parent_rate / 2 != rate))
  5727. return -EINVAL;
  5728. - pmc_lock(pmc);
  5729. - tmp = pmc_read(pmc, AT91_PMC_MCKR) & ~AT91_PMC_PLLADIV2;
  5730. - if ((parent_rate / 2) == rate)
  5731. - tmp |= AT91_PMC_PLLADIV2;
  5732. - pmc_write(pmc, AT91_PMC_MCKR, tmp);
  5733. - pmc_unlock(pmc);
  5734. + regmap_update_bits(plldiv->regmap, AT91_PMC_MCKR, AT91_PMC_PLLADIV2,
  5735. + parent_rate != rate ? AT91_PMC_PLLADIV2 : 0);
  5736. return 0;
  5737. }
  5738. @@ -80,7 +76,7 @@
  5739. };
  5740. static struct clk * __init
  5741. -at91_clk_register_plldiv(struct at91_pmc *pmc, const char *name,
  5742. +at91_clk_register_plldiv(struct regmap *regmap, const char *name,
  5743. const char *parent_name)
  5744. {
  5745. struct clk_plldiv *plldiv;
  5746. @@ -98,7 +94,7 @@
  5747. init.flags = CLK_SET_RATE_GATE;
  5748. plldiv->hw.init = &init;
  5749. - plldiv->pmc = pmc;
  5750. + plldiv->regmap = regmap;
  5751. clk = clk_register(NULL, &plldiv->hw);
  5752. @@ -109,27 +105,27 @@
  5753. }
  5754. static void __init
  5755. -of_at91_clk_plldiv_setup(struct device_node *np, struct at91_pmc *pmc)
  5756. +of_at91sam9x5_clk_plldiv_setup(struct device_node *np)
  5757. {
  5758. struct clk *clk;
  5759. const char *parent_name;
  5760. const char *name = np->name;
  5761. + struct regmap *regmap;
  5762. parent_name = of_clk_get_parent_name(np, 0);
  5763. of_property_read_string(np, "clock-output-names", &name);
  5764. - clk = at91_clk_register_plldiv(pmc, name, parent_name);
  5765. + regmap = syscon_node_to_regmap(of_get_parent(np));
  5766. + if (IS_ERR(regmap))
  5767. + return;
  5768. + clk = at91_clk_register_plldiv(regmap, name, parent_name);
  5769. if (IS_ERR(clk))
  5770. return;
  5771. of_clk_add_provider(np, of_clk_src_simple_get, clk);
  5772. return;
  5773. }
  5774. -
  5775. -void __init of_at91sam9x5_clk_plldiv_setup(struct device_node *np,
  5776. - struct at91_pmc *pmc)
  5777. -{
  5778. - of_at91_clk_plldiv_setup(np, pmc);
  5779. -}
  5780. +CLK_OF_DECLARE(at91sam9x5_clk_plldiv, "atmel,at91sam9x5-clk-plldiv",
  5781. + of_at91sam9x5_clk_plldiv_setup);
  5782. diff -Nur linux-4.4.62.orig/drivers/clk/at91/clk-programmable.c linux-4.4.62/drivers/clk/at91/clk-programmable.c
  5783. --- linux-4.4.62.orig/drivers/clk/at91/clk-programmable.c 2017-04-18 07:15:37.000000000 +0200
  5784. +++ linux-4.4.62/drivers/clk/at91/clk-programmable.c 2017-04-18 17:38:08.046643506 +0200
  5785. @@ -12,10 +12,8 @@
  5786. #include <linux/clkdev.h>
  5787. #include <linux/clk/at91_pmc.h>
  5788. #include <linux/of.h>
  5789. -#include <linux/of_address.h>
  5790. -#include <linux/io.h>
  5791. -#include <linux/wait.h>
  5792. -#include <linux/sched.h>
  5793. +#include <linux/mfd/syscon.h>
  5794. +#include <linux/regmap.h>
  5795. #include "pmc.h"
  5796. @@ -24,6 +22,7 @@
  5797. #define PROG_STATUS_MASK(id) (1 << ((id) + 8))
  5798. #define PROG_PRES_MASK 0x7
  5799. +#define PROG_PRES(layout, pckr) ((pckr >> layout->pres_shift) & PROG_PRES_MASK)
  5800. #define PROG_MAX_RM9200_CSS 3
  5801. struct clk_programmable_layout {
  5802. @@ -34,7 +33,7 @@
  5803. struct clk_programmable {
  5804. struct clk_hw hw;
  5805. - struct at91_pmc *pmc;
  5806. + struct regmap *regmap;
  5807. u8 id;
  5808. const struct clk_programmable_layout *layout;
  5809. };
  5810. @@ -44,14 +43,12 @@
  5811. static unsigned long clk_programmable_recalc_rate(struct clk_hw *hw,
  5812. unsigned long parent_rate)
  5813. {
  5814. - u32 pres;
  5815. struct clk_programmable *prog = to_clk_programmable(hw);
  5816. - struct at91_pmc *pmc = prog->pmc;
  5817. - const struct clk_programmable_layout *layout = prog->layout;
  5818. + unsigned int pckr;
  5819. +
  5820. + regmap_read(prog->regmap, AT91_PMC_PCKR(prog->id), &pckr);
  5821. - pres = (pmc_read(pmc, AT91_PMC_PCKR(prog->id)) >> layout->pres_shift) &
  5822. - PROG_PRES_MASK;
  5823. - return parent_rate >> pres;
  5824. + return parent_rate >> PROG_PRES(prog->layout, pckr);
  5825. }
  5826. static int clk_programmable_determine_rate(struct clk_hw *hw,
  5827. @@ -101,36 +98,36 @@
  5828. {
  5829. struct clk_programmable *prog = to_clk_programmable(hw);
  5830. const struct clk_programmable_layout *layout = prog->layout;
  5831. - struct at91_pmc *pmc = prog->pmc;
  5832. - u32 tmp = pmc_read(pmc, AT91_PMC_PCKR(prog->id)) & ~layout->css_mask;
  5833. + unsigned int mask = layout->css_mask;
  5834. + unsigned int pckr = 0;
  5835. if (layout->have_slck_mck)
  5836. - tmp &= AT91_PMC_CSSMCK_MCK;
  5837. + mask |= AT91_PMC_CSSMCK_MCK;
  5838. if (index > layout->css_mask) {
  5839. - if (index > PROG_MAX_RM9200_CSS && layout->have_slck_mck) {
  5840. - tmp |= AT91_PMC_CSSMCK_MCK;
  5841. - return 0;
  5842. - } else {
  5843. + if (index > PROG_MAX_RM9200_CSS && !layout->have_slck_mck)
  5844. return -EINVAL;
  5845. - }
  5846. +
  5847. + pckr |= AT91_PMC_CSSMCK_MCK;
  5848. }
  5849. - pmc_write(pmc, AT91_PMC_PCKR(prog->id), tmp | index);
  5850. + regmap_update_bits(prog->regmap, AT91_PMC_PCKR(prog->id), mask, pckr);
  5851. +
  5852. return 0;
  5853. }
  5854. static u8 clk_programmable_get_parent(struct clk_hw *hw)
  5855. {
  5856. - u32 tmp;
  5857. - u8 ret;
  5858. struct clk_programmable *prog = to_clk_programmable(hw);
  5859. - struct at91_pmc *pmc = prog->pmc;
  5860. const struct clk_programmable_layout *layout = prog->layout;
  5861. + unsigned int pckr;
  5862. + u8 ret;
  5863. +
  5864. + regmap_read(prog->regmap, AT91_PMC_PCKR(prog->id), &pckr);
  5865. +
  5866. + ret = pckr & layout->css_mask;
  5867. - tmp = pmc_read(pmc, AT91_PMC_PCKR(prog->id));
  5868. - ret = tmp & layout->css_mask;
  5869. - if (layout->have_slck_mck && (tmp & AT91_PMC_CSSMCK_MCK) && !ret)
  5870. + if (layout->have_slck_mck && (pckr & AT91_PMC_CSSMCK_MCK) && !ret)
  5871. ret = PROG_MAX_RM9200_CSS + 1;
  5872. return ret;
  5873. @@ -140,26 +137,27 @@
  5874. unsigned long parent_rate)
  5875. {
  5876. struct clk_programmable *prog = to_clk_programmable(hw);
  5877. - struct at91_pmc *pmc = prog->pmc;
  5878. const struct clk_programmable_layout *layout = prog->layout;
  5879. unsigned long div = parent_rate / rate;
  5880. + unsigned int pckr;
  5881. int shift = 0;
  5882. - u32 tmp = pmc_read(pmc, AT91_PMC_PCKR(prog->id)) &
  5883. - ~(PROG_PRES_MASK << layout->pres_shift);
  5884. +
  5885. + regmap_read(prog->regmap, AT91_PMC_PCKR(prog->id), &pckr);
  5886. if (!div)
  5887. return -EINVAL;
  5888. shift = fls(div) - 1;
  5889. - if (div != (1<<shift))
  5890. + if (div != (1 << shift))
  5891. return -EINVAL;
  5892. if (shift >= PROG_PRES_MASK)
  5893. return -EINVAL;
  5894. - pmc_write(pmc, AT91_PMC_PCKR(prog->id),
  5895. - tmp | (shift << layout->pres_shift));
  5896. + regmap_update_bits(prog->regmap, AT91_PMC_PCKR(prog->id),
  5897. + PROG_PRES_MASK << layout->pres_shift,
  5898. + shift << layout->pres_shift);
  5899. return 0;
  5900. }
  5901. @@ -173,7 +171,7 @@
  5902. };
  5903. static struct clk * __init
  5904. -at91_clk_register_programmable(struct at91_pmc *pmc,
  5905. +at91_clk_register_programmable(struct regmap *regmap,
  5906. const char *name, const char **parent_names,
  5907. u8 num_parents, u8 id,
  5908. const struct clk_programmable_layout *layout)
  5909. @@ -198,7 +196,7 @@
  5910. prog->id = id;
  5911. prog->layout = layout;
  5912. prog->hw.init = &init;
  5913. - prog->pmc = pmc;
  5914. + prog->regmap = regmap;
  5915. clk = clk_register(NULL, &prog->hw);
  5916. if (IS_ERR(clk))
  5917. @@ -226,7 +224,7 @@
  5918. };
  5919. static void __init
  5920. -of_at91_clk_prog_setup(struct device_node *np, struct at91_pmc *pmc,
  5921. +of_at91_clk_prog_setup(struct device_node *np,
  5922. const struct clk_programmable_layout *layout)
  5923. {
  5924. int num;
  5925. @@ -236,6 +234,7 @@
  5926. const char *parent_names[PROG_SOURCE_MAX];
  5927. const char *name;
  5928. struct device_node *progclknp;
  5929. + struct regmap *regmap;
  5930. num_parents = of_clk_get_parent_count(np);
  5931. if (num_parents <= 0 || num_parents > PROG_SOURCE_MAX)
  5932. @@ -247,6 +246,10 @@
  5933. if (!num || num > (PROG_ID_MAX + 1))
  5934. return;
  5935. + regmap = syscon_node_to_regmap(of_get_parent(np));
  5936. + if (IS_ERR(regmap))
  5937. + return;
  5938. +
  5939. for_each_child_of_node(np, progclknp) {
  5940. if (of_property_read_u32(progclknp, "reg", &id))
  5941. continue;
  5942. @@ -254,7 +257,7 @@
  5943. if (of_property_read_string(np, "clock-output-names", &name))
  5944. name = progclknp->name;
  5945. - clk = at91_clk_register_programmable(pmc, name,
  5946. + clk = at91_clk_register_programmable(regmap, name,
  5947. parent_names, num_parents,
  5948. id, layout);
  5949. if (IS_ERR(clk))
  5950. @@ -265,20 +268,23 @@
  5951. }
  5952. -void __init of_at91rm9200_clk_prog_setup(struct device_node *np,
  5953. - struct at91_pmc *pmc)
  5954. +static void __init of_at91rm9200_clk_prog_setup(struct device_node *np)
  5955. {
  5956. - of_at91_clk_prog_setup(np, pmc, &at91rm9200_programmable_layout);
  5957. + of_at91_clk_prog_setup(np, &at91rm9200_programmable_layout);
  5958. }
  5959. +CLK_OF_DECLARE(at91rm9200_clk_prog, "atmel,at91rm9200-clk-programmable",
  5960. + of_at91rm9200_clk_prog_setup);
  5961. -void __init of_at91sam9g45_clk_prog_setup(struct device_node *np,
  5962. - struct at91_pmc *pmc)
  5963. +static void __init of_at91sam9g45_clk_prog_setup(struct device_node *np)
  5964. {
  5965. - of_at91_clk_prog_setup(np, pmc, &at91sam9g45_programmable_layout);
  5966. + of_at91_clk_prog_setup(np, &at91sam9g45_programmable_layout);
  5967. }
  5968. +CLK_OF_DECLARE(at91sam9g45_clk_prog, "atmel,at91sam9g45-clk-programmable",
  5969. + of_at91sam9g45_clk_prog_setup);
  5970. -void __init of_at91sam9x5_clk_prog_setup(struct device_node *np,
  5971. - struct at91_pmc *pmc)
  5972. +static void __init of_at91sam9x5_clk_prog_setup(struct device_node *np)
  5973. {
  5974. - of_at91_clk_prog_setup(np, pmc, &at91sam9x5_programmable_layout);
  5975. + of_at91_clk_prog_setup(np, &at91sam9x5_programmable_layout);
  5976. }
  5977. +CLK_OF_DECLARE(at91sam9x5_clk_prog, "atmel,at91sam9x5-clk-programmable",
  5978. + of_at91sam9x5_clk_prog_setup);
  5979. diff -Nur linux-4.4.62.orig/drivers/clk/at91/clk-slow.c linux-4.4.62/drivers/clk/at91/clk-slow.c
  5980. --- linux-4.4.62.orig/drivers/clk/at91/clk-slow.c 2017-04-18 07:15:37.000000000 +0200
  5981. +++ linux-4.4.62/drivers/clk/at91/clk-slow.c 2017-04-18 17:38:08.046643506 +0200
  5982. @@ -13,17 +13,11 @@
  5983. #include <linux/clk.h>
  5984. #include <linux/clk-provider.h>
  5985. #include <linux/clkdev.h>
  5986. -#include <linux/slab.h>
  5987. #include <linux/clk/at91_pmc.h>
  5988. #include <linux/delay.h>
  5989. #include <linux/of.h>
  5990. -#include <linux/of_address.h>
  5991. -#include <linux/of_irq.h>
  5992. -#include <linux/io.h>
  5993. -#include <linux/interrupt.h>
  5994. -#include <linux/irq.h>
  5995. -#include <linux/sched.h>
  5996. -#include <linux/wait.h>
  5997. +#include <linux/mfd/syscon.h>
  5998. +#include <linux/regmap.h>
  5999. #include "pmc.h"
  6000. #include "sckc.h"
  6001. @@ -59,7 +53,7 @@
  6002. struct clk_sam9260_slow {
  6003. struct clk_hw hw;
  6004. - struct at91_pmc *pmc;
  6005. + struct regmap *regmap;
  6006. };
  6007. #define to_clk_sam9260_slow(hw) container_of(hw, struct clk_sam9260_slow, hw)
  6008. @@ -393,8 +387,11 @@
  6009. static u8 clk_sam9260_slow_get_parent(struct clk_hw *hw)
  6010. {
  6011. struct clk_sam9260_slow *slowck = to_clk_sam9260_slow(hw);
  6012. + unsigned int status;
  6013. - return !!(pmc_read(slowck->pmc, AT91_PMC_SR) & AT91_PMC_OSCSEL);
  6014. + regmap_read(slowck->regmap, AT91_PMC_SR, &status);
  6015. +
  6016. + return status & AT91_PMC_OSCSEL ? 1 : 0;
  6017. }
  6018. static const struct clk_ops sam9260_slow_ops = {
  6019. @@ -402,7 +399,7 @@
  6020. };
  6021. static struct clk * __init
  6022. -at91_clk_register_sam9260_slow(struct at91_pmc *pmc,
  6023. +at91_clk_register_sam9260_slow(struct regmap *regmap,
  6024. const char *name,
  6025. const char **parent_names,
  6026. int num_parents)
  6027. @@ -411,7 +408,7 @@
  6028. struct clk *clk = NULL;
  6029. struct clk_init_data init;
  6030. - if (!pmc || !name)
  6031. + if (!name)
  6032. return ERR_PTR(-EINVAL);
  6033. if (!parent_names || !num_parents)
  6034. @@ -428,7 +425,7 @@
  6035. init.flags = 0;
  6036. slowck->hw.init = &init;
  6037. - slowck->pmc = pmc;
  6038. + slowck->regmap = regmap;
  6039. clk = clk_register(NULL, &slowck->hw);
  6040. if (IS_ERR(clk))
  6041. @@ -439,29 +436,34 @@
  6042. return clk;
  6043. }
  6044. -void __init of_at91sam9260_clk_slow_setup(struct device_node *np,
  6045. - struct at91_pmc *pmc)
  6046. +static void __init of_at91sam9260_clk_slow_setup(struct device_node *np)
  6047. {
  6048. struct clk *clk;
  6049. const char *parent_names[2];
  6050. int num_parents;
  6051. const char *name = np->name;
  6052. + struct regmap *regmap;
  6053. num_parents = of_clk_get_parent_count(np);
  6054. if (num_parents != 2)
  6055. return;
  6056. of_clk_parent_fill(np, parent_names, num_parents);
  6057. + regmap = syscon_node_to_regmap(of_get_parent(np));
  6058. + if (IS_ERR(regmap))
  6059. + return;
  6060. of_property_read_string(np, "clock-output-names", &name);
  6061. - clk = at91_clk_register_sam9260_slow(pmc, name, parent_names,
  6062. + clk = at91_clk_register_sam9260_slow(regmap, name, parent_names,
  6063. num_parents);
  6064. if (IS_ERR(clk))
  6065. return;
  6066. of_clk_add_provider(np, of_clk_src_simple_get, clk);
  6067. }
  6068. +CLK_OF_DECLARE(at91sam9260_clk_slow, "atmel,at91sam9260-clk-slow",
  6069. + of_at91sam9260_clk_slow_setup);
  6070. /*
  6071. * FIXME: All slow clk users are not properly claiming it (get + prepare +
  6072. diff -Nur linux-4.4.62.orig/drivers/clk/at91/clk-smd.c linux-4.4.62/drivers/clk/at91/clk-smd.c
  6073. --- linux-4.4.62.orig/drivers/clk/at91/clk-smd.c 2017-04-18 07:15:37.000000000 +0200
  6074. +++ linux-4.4.62/drivers/clk/at91/clk-smd.c 2017-04-18 17:38:08.046643506 +0200
  6075. @@ -12,8 +12,8 @@
  6076. #include <linux/clkdev.h>
  6077. #include <linux/clk/at91_pmc.h>
  6078. #include <linux/of.h>
  6079. -#include <linux/of_address.h>
  6080. -#include <linux/io.h>
  6081. +#include <linux/mfd/syscon.h>
  6082. +#include <linux/regmap.h>
  6083. #include "pmc.h"
  6084. @@ -24,7 +24,7 @@
  6085. struct at91sam9x5_clk_smd {
  6086. struct clk_hw hw;
  6087. - struct at91_pmc *pmc;
  6088. + struct regmap *regmap;
  6089. };
  6090. #define to_at91sam9x5_clk_smd(hw) \
  6091. @@ -33,13 +33,13 @@
  6092. static unsigned long at91sam9x5_clk_smd_recalc_rate(struct clk_hw *hw,
  6093. unsigned long parent_rate)
  6094. {
  6095. - u32 tmp;
  6096. - u8 smddiv;
  6097. struct at91sam9x5_clk_smd *smd = to_at91sam9x5_clk_smd(hw);
  6098. - struct at91_pmc *pmc = smd->pmc;
  6099. + unsigned int smdr;
  6100. + u8 smddiv;
  6101. +
  6102. + regmap_read(smd->regmap, AT91_PMC_SMD, &smdr);
  6103. + smddiv = (smdr & AT91_PMC_SMD_DIV) >> SMD_DIV_SHIFT;
  6104. - tmp = pmc_read(pmc, AT91_PMC_SMD);
  6105. - smddiv = (tmp & AT91_PMC_SMD_DIV) >> SMD_DIV_SHIFT;
  6106. return parent_rate / (smddiv + 1);
  6107. }
  6108. @@ -67,40 +67,38 @@
  6109. static int at91sam9x5_clk_smd_set_parent(struct clk_hw *hw, u8 index)
  6110. {
  6111. - u32 tmp;
  6112. struct at91sam9x5_clk_smd *smd = to_at91sam9x5_clk_smd(hw);
  6113. - struct at91_pmc *pmc = smd->pmc;
  6114. if (index > 1)
  6115. return -EINVAL;
  6116. - tmp = pmc_read(pmc, AT91_PMC_SMD) & ~AT91_PMC_SMDS;
  6117. - if (index)
  6118. - tmp |= AT91_PMC_SMDS;
  6119. - pmc_write(pmc, AT91_PMC_SMD, tmp);
  6120. +
  6121. + regmap_update_bits(smd->regmap, AT91_PMC_SMD, AT91_PMC_SMDS,
  6122. + index ? AT91_PMC_SMDS : 0);
  6123. +
  6124. return 0;
  6125. }
  6126. static u8 at91sam9x5_clk_smd_get_parent(struct clk_hw *hw)
  6127. {
  6128. struct at91sam9x5_clk_smd *smd = to_at91sam9x5_clk_smd(hw);
  6129. - struct at91_pmc *pmc = smd->pmc;
  6130. + unsigned int smdr;
  6131. - return pmc_read(pmc, AT91_PMC_SMD) & AT91_PMC_SMDS;
  6132. + regmap_read(smd->regmap, AT91_PMC_SMD, &smdr);
  6133. +
  6134. + return smdr & AT91_PMC_SMDS;
  6135. }
  6136. static int at91sam9x5_clk_smd_set_rate(struct clk_hw *hw, unsigned long rate,
  6137. unsigned long parent_rate)
  6138. {
  6139. - u32 tmp;
  6140. struct at91sam9x5_clk_smd *smd = to_at91sam9x5_clk_smd(hw);
  6141. - struct at91_pmc *pmc = smd->pmc;
  6142. unsigned long div = parent_rate / rate;
  6143. if (parent_rate % rate || div < 1 || div > (SMD_MAX_DIV + 1))
  6144. return -EINVAL;
  6145. - tmp = pmc_read(pmc, AT91_PMC_SMD) & ~AT91_PMC_SMD_DIV;
  6146. - tmp |= (div - 1) << SMD_DIV_SHIFT;
  6147. - pmc_write(pmc, AT91_PMC_SMD, tmp);
  6148. +
  6149. + regmap_update_bits(smd->regmap, AT91_PMC_SMD, AT91_PMC_SMD_DIV,
  6150. + (div - 1) << SMD_DIV_SHIFT);
  6151. return 0;
  6152. }
  6153. @@ -114,7 +112,7 @@
  6154. };
  6155. static struct clk * __init
  6156. -at91sam9x5_clk_register_smd(struct at91_pmc *pmc, const char *name,
  6157. +at91sam9x5_clk_register_smd(struct regmap *regmap, const char *name,
  6158. const char **parent_names, u8 num_parents)
  6159. {
  6160. struct at91sam9x5_clk_smd *smd;
  6161. @@ -132,7 +130,7 @@
  6162. init.flags = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE;
  6163. smd->hw.init = &init;
  6164. - smd->pmc = pmc;
  6165. + smd->regmap = regmap;
  6166. clk = clk_register(NULL, &smd->hw);
  6167. if (IS_ERR(clk))
  6168. @@ -141,13 +139,13 @@
  6169. return clk;
  6170. }
  6171. -void __init of_at91sam9x5_clk_smd_setup(struct device_node *np,
  6172. - struct at91_pmc *pmc)
  6173. +static void __init of_at91sam9x5_clk_smd_setup(struct device_node *np)
  6174. {
  6175. struct clk *clk;
  6176. int num_parents;
  6177. const char *parent_names[SMD_SOURCE_MAX];
  6178. const char *name = np->name;
  6179. + struct regmap *regmap;
  6180. num_parents = of_clk_get_parent_count(np);
  6181. if (num_parents <= 0 || num_parents > SMD_SOURCE_MAX)
  6182. @@ -157,10 +155,16 @@
  6183. of_property_read_string(np, "clock-output-names", &name);
  6184. - clk = at91sam9x5_clk_register_smd(pmc, name, parent_names,
  6185. + regmap = syscon_node_to_regmap(of_get_parent(np));
  6186. + if (IS_ERR(regmap))
  6187. + return;
  6188. +
  6189. + clk = at91sam9x5_clk_register_smd(regmap, name, parent_names,
  6190. num_parents);
  6191. if (IS_ERR(clk))
  6192. return;
  6193. of_clk_add_provider(np, of_clk_src_simple_get, clk);
  6194. }
  6195. +CLK_OF_DECLARE(at91sam9x5_clk_smd, "atmel,at91sam9x5-clk-smd",
  6196. + of_at91sam9x5_clk_smd_setup);
  6197. diff -Nur linux-4.4.62.orig/drivers/clk/at91/clk-system.c linux-4.4.62/drivers/clk/at91/clk-system.c
  6198. --- linux-4.4.62.orig/drivers/clk/at91/clk-system.c 2017-04-18 07:15:37.000000000 +0200
  6199. +++ linux-4.4.62/drivers/clk/at91/clk-system.c 2017-04-18 17:38:08.046643506 +0200
  6200. @@ -12,13 +12,8 @@
  6201. #include <linux/clkdev.h>
  6202. #include <linux/clk/at91_pmc.h>
  6203. #include <linux/of.h>
  6204. -#include <linux/of_address.h>
  6205. -#include <linux/io.h>
  6206. -#include <linux/irq.h>
  6207. -#include <linux/of_irq.h>
  6208. -#include <linux/interrupt.h>
  6209. -#include <linux/wait.h>
  6210. -#include <linux/sched.h>
  6211. +#include <linux/mfd/syscon.h>
  6212. +#include <linux/regmap.h>
  6213. #include "pmc.h"
  6214. @@ -29,9 +24,7 @@
  6215. #define to_clk_system(hw) container_of(hw, struct clk_system, hw)
  6216. struct clk_system {
  6217. struct clk_hw hw;
  6218. - struct at91_pmc *pmc;
  6219. - unsigned int irq;
  6220. - wait_queue_head_t wait;
  6221. + struct regmap *regmap;
  6222. u8 id;
  6223. };
  6224. @@ -39,58 +32,54 @@
  6225. {
  6226. return (id >= 8) && (id <= 15);
  6227. }
  6228. -static irqreturn_t clk_system_irq_handler(int irq, void *dev_id)
  6229. +
  6230. +static inline bool clk_system_ready(struct regmap *regmap, int id)
  6231. {
  6232. - struct clk_system *sys = (struct clk_system *)dev_id;
  6233. + unsigned int status;
  6234. - wake_up(&sys->wait);
  6235. - disable_irq_nosync(sys->irq);
  6236. + regmap_read(regmap, AT91_PMC_SR, &status);
  6237. - return IRQ_HANDLED;
  6238. + return status & (1 << id) ? 1 : 0;
  6239. }
  6240. static int clk_system_prepare(struct clk_hw *hw)
  6241. {
  6242. struct clk_system *sys = to_clk_system(hw);
  6243. - struct at91_pmc *pmc = sys->pmc;
  6244. - u32 mask = 1 << sys->id;
  6245. - pmc_write(pmc, AT91_PMC_SCER, mask);
  6246. + regmap_write(sys->regmap, AT91_PMC_SCER, 1 << sys->id);
  6247. if (!is_pck(sys->id))
  6248. return 0;
  6249. - while (!(pmc_read(pmc, AT91_PMC_SR) & mask)) {
  6250. - if (sys->irq) {
  6251. - enable_irq(sys->irq);
  6252. - wait_event(sys->wait,
  6253. - pmc_read(pmc, AT91_PMC_SR) & mask);
  6254. - } else
  6255. - cpu_relax();
  6256. - }
  6257. + while (!clk_system_ready(sys->regmap, sys->id))
  6258. + cpu_relax();
  6259. +
  6260. return 0;
  6261. }
  6262. static void clk_system_unprepare(struct clk_hw *hw)
  6263. {
  6264. struct clk_system *sys = to_clk_system(hw);
  6265. - struct at91_pmc *pmc = sys->pmc;
  6266. - pmc_write(pmc, AT91_PMC_SCDR, 1 << sys->id);
  6267. + regmap_write(sys->regmap, AT91_PMC_SCDR, 1 << sys->id);
  6268. }
  6269. static int clk_system_is_prepared(struct clk_hw *hw)
  6270. {
  6271. struct clk_system *sys = to_clk_system(hw);
  6272. - struct at91_pmc *pmc = sys->pmc;
  6273. + unsigned int status;
  6274. +
  6275. + regmap_read(sys->regmap, AT91_PMC_SCSR, &status);
  6276. - if (!(pmc_read(pmc, AT91_PMC_SCSR) & (1 << sys->id)))
  6277. + if (!(status & (1 << sys->id)))
  6278. return 0;
  6279. if (!is_pck(sys->id))
  6280. return 1;
  6281. - return !!(pmc_read(pmc, AT91_PMC_SR) & (1 << sys->id));
  6282. + regmap_read(sys->regmap, AT91_PMC_SR, &status);
  6283. +
  6284. + return status & (1 << sys->id) ? 1 : 0;
  6285. }
  6286. static const struct clk_ops system_ops = {
  6287. @@ -100,13 +89,12 @@
  6288. };
  6289. static struct clk * __init
  6290. -at91_clk_register_system(struct at91_pmc *pmc, const char *name,
  6291. - const char *parent_name, u8 id, int irq)
  6292. +at91_clk_register_system(struct regmap *regmap, const char *name,
  6293. + const char *parent_name, u8 id)
  6294. {
  6295. struct clk_system *sys;
  6296. struct clk *clk = NULL;
  6297. struct clk_init_data init;
  6298. - int ret;
  6299. if (!parent_name || id > SYSTEM_MAX_ID)
  6300. return ERR_PTR(-EINVAL);
  6301. @@ -123,44 +111,33 @@
  6302. sys->id = id;
  6303. sys->hw.init = &init;
  6304. - sys->pmc = pmc;
  6305. - sys->irq = irq;
  6306. - if (irq) {
  6307. - init_waitqueue_head(&sys->wait);
  6308. - irq_set_status_flags(sys->irq, IRQ_NOAUTOEN);
  6309. - ret = request_irq(sys->irq, clk_system_irq_handler,
  6310. - IRQF_TRIGGER_HIGH, name, sys);
  6311. - if (ret) {
  6312. - kfree(sys);
  6313. - return ERR_PTR(ret);
  6314. - }
  6315. - }
  6316. + sys->regmap = regmap;
  6317. clk = clk_register(NULL, &sys->hw);
  6318. - if (IS_ERR(clk)) {
  6319. - if (irq)
  6320. - free_irq(sys->irq, sys);
  6321. + if (IS_ERR(clk))
  6322. kfree(sys);
  6323. - }
  6324. return clk;
  6325. }
  6326. -static void __init
  6327. -of_at91_clk_sys_setup(struct device_node *np, struct at91_pmc *pmc)
  6328. +static void __init of_at91rm9200_clk_sys_setup(struct device_node *np)
  6329. {
  6330. int num;
  6331. - int irq = 0;
  6332. u32 id;
  6333. struct clk *clk;
  6334. const char *name;
  6335. struct device_node *sysclknp;
  6336. const char *parent_name;
  6337. + struct regmap *regmap;
  6338. num = of_get_child_count(np);
  6339. if (num > (SYSTEM_MAX_ID + 1))
  6340. return;
  6341. + regmap = syscon_node_to_regmap(of_get_parent(np));
  6342. + if (IS_ERR(regmap))
  6343. + return;
  6344. +
  6345. for_each_child_of_node(np, sysclknp) {
  6346. if (of_property_read_u32(sysclknp, "reg", &id))
  6347. continue;
  6348. @@ -168,21 +145,14 @@
  6349. if (of_property_read_string(np, "clock-output-names", &name))
  6350. name = sysclknp->name;
  6351. - if (is_pck(id))
  6352. - irq = irq_of_parse_and_map(sysclknp, 0);
  6353. -
  6354. parent_name = of_clk_get_parent_name(sysclknp, 0);
  6355. - clk = at91_clk_register_system(pmc, name, parent_name, id, irq);
  6356. + clk = at91_clk_register_system(regmap, name, parent_name, id);
  6357. if (IS_ERR(clk))
  6358. continue;
  6359. of_clk_add_provider(sysclknp, of_clk_src_simple_get, clk);
  6360. }
  6361. }
  6362. -
  6363. -void __init of_at91rm9200_clk_sys_setup(struct device_node *np,
  6364. - struct at91_pmc *pmc)
  6365. -{
  6366. - of_at91_clk_sys_setup(np, pmc);
  6367. -}
  6368. +CLK_OF_DECLARE(at91rm9200_clk_sys, "atmel,at91rm9200-clk-system",
  6369. + of_at91rm9200_clk_sys_setup);
  6370. diff -Nur linux-4.4.62.orig/drivers/clk/at91/clk-usb.c linux-4.4.62/drivers/clk/at91/clk-usb.c
  6371. --- linux-4.4.62.orig/drivers/clk/at91/clk-usb.c 2017-04-18 07:15:37.000000000 +0200
  6372. +++ linux-4.4.62/drivers/clk/at91/clk-usb.c 2017-04-18 17:38:08.046643506 +0200
  6373. @@ -12,8 +12,8 @@
  6374. #include <linux/clkdev.h>
  6375. #include <linux/clk/at91_pmc.h>
  6376. #include <linux/of.h>
  6377. -#include <linux/of_address.h>
  6378. -#include <linux/io.h>
  6379. +#include <linux/mfd/syscon.h>
  6380. +#include <linux/regmap.h>
  6381. #include "pmc.h"
  6382. @@ -27,7 +27,7 @@
  6383. struct at91sam9x5_clk_usb {
  6384. struct clk_hw hw;
  6385. - struct at91_pmc *pmc;
  6386. + struct regmap *regmap;
  6387. };
  6388. #define to_at91sam9x5_clk_usb(hw) \
  6389. @@ -35,7 +35,7 @@
  6390. struct at91rm9200_clk_usb {
  6391. struct clk_hw hw;
  6392. - struct at91_pmc *pmc;
  6393. + struct regmap *regmap;
  6394. u32 divisors[4];
  6395. };
  6396. @@ -45,13 +45,12 @@
  6397. static unsigned long at91sam9x5_clk_usb_recalc_rate(struct clk_hw *hw,
  6398. unsigned long parent_rate)
  6399. {
  6400. - u32 tmp;
  6401. - u8 usbdiv;
  6402. struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
  6403. - struct at91_pmc *pmc = usb->pmc;
  6404. + unsigned int usbr;
  6405. + u8 usbdiv;
  6406. - tmp = pmc_read(pmc, AT91_PMC_USB);
  6407. - usbdiv = (tmp & AT91_PMC_OHCIUSBDIV) >> SAM9X5_USB_DIV_SHIFT;
  6408. + regmap_read(usb->regmap, AT91_PMC_USB, &usbr);
  6409. + usbdiv = (usbr & AT91_PMC_OHCIUSBDIV) >> SAM9X5_USB_DIV_SHIFT;
  6410. return DIV_ROUND_CLOSEST(parent_rate, (usbdiv + 1));
  6411. }
  6412. @@ -109,33 +108,31 @@
  6413. static int at91sam9x5_clk_usb_set_parent(struct clk_hw *hw, u8 index)
  6414. {
  6415. - u32 tmp;
  6416. struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
  6417. - struct at91_pmc *pmc = usb->pmc;
  6418. if (index > 1)
  6419. return -EINVAL;
  6420. - tmp = pmc_read(pmc, AT91_PMC_USB) & ~AT91_PMC_USBS;
  6421. - if (index)
  6422. - tmp |= AT91_PMC_USBS;
  6423. - pmc_write(pmc, AT91_PMC_USB, tmp);
  6424. +
  6425. + regmap_update_bits(usb->regmap, AT91_PMC_USB, AT91_PMC_USBS,
  6426. + index ? AT91_PMC_USBS : 0);
  6427. +
  6428. return 0;
  6429. }
  6430. static u8 at91sam9x5_clk_usb_get_parent(struct clk_hw *hw)
  6431. {
  6432. struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
  6433. - struct at91_pmc *pmc = usb->pmc;
  6434. + unsigned int usbr;
  6435. - return pmc_read(pmc, AT91_PMC_USB) & AT91_PMC_USBS;
  6436. + regmap_read(usb->regmap, AT91_PMC_USB, &usbr);
  6437. +
  6438. + return usbr & AT91_PMC_USBS;
  6439. }
  6440. static int at91sam9x5_clk_usb_set_rate(struct clk_hw *hw, unsigned long rate,
  6441. unsigned long parent_rate)
  6442. {
  6443. - u32 tmp;
  6444. struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
  6445. - struct at91_pmc *pmc = usb->pmc;
  6446. unsigned long div;
  6447. if (!rate)
  6448. @@ -145,9 +142,8 @@
  6449. if (div > SAM9X5_USB_MAX_DIV + 1 || !div)
  6450. return -EINVAL;
  6451. - tmp = pmc_read(pmc, AT91_PMC_USB) & ~AT91_PMC_OHCIUSBDIV;
  6452. - tmp |= (div - 1) << SAM9X5_USB_DIV_SHIFT;
  6453. - pmc_write(pmc, AT91_PMC_USB, tmp);
  6454. + regmap_update_bits(usb->regmap, AT91_PMC_USB, AT91_PMC_OHCIUSBDIV,
  6455. + (div - 1) << SAM9X5_USB_DIV_SHIFT);
  6456. return 0;
  6457. }
  6458. @@ -163,28 +159,28 @@
  6459. static int at91sam9n12_clk_usb_enable(struct clk_hw *hw)
  6460. {
  6461. struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
  6462. - struct at91_pmc *pmc = usb->pmc;
  6463. - pmc_write(pmc, AT91_PMC_USB,
  6464. - pmc_read(pmc, AT91_PMC_USB) | AT91_PMC_USBS);
  6465. + regmap_update_bits(usb->regmap, AT91_PMC_USB, AT91_PMC_USBS,
  6466. + AT91_PMC_USBS);
  6467. +
  6468. return 0;
  6469. }
  6470. static void at91sam9n12_clk_usb_disable(struct clk_hw *hw)
  6471. {
  6472. struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
  6473. - struct at91_pmc *pmc = usb->pmc;
  6474. - pmc_write(pmc, AT91_PMC_USB,
  6475. - pmc_read(pmc, AT91_PMC_USB) & ~AT91_PMC_USBS);
  6476. + regmap_update_bits(usb->regmap, AT91_PMC_USB, AT91_PMC_USBS, 0);
  6477. }
  6478. static int at91sam9n12_clk_usb_is_enabled(struct clk_hw *hw)
  6479. {
  6480. struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
  6481. - struct at91_pmc *pmc = usb->pmc;
  6482. + unsigned int usbr;
  6483. - return !!(pmc_read(pmc, AT91_PMC_USB) & AT91_PMC_USBS);
  6484. + regmap_read(usb->regmap, AT91_PMC_USB, &usbr);
  6485. +
  6486. + return usbr & AT91_PMC_USBS;
  6487. }
  6488. static const struct clk_ops at91sam9n12_usb_ops = {
  6489. @@ -197,7 +193,7 @@
  6490. };
  6491. static struct clk * __init
  6492. -at91sam9x5_clk_register_usb(struct at91_pmc *pmc, const char *name,
  6493. +at91sam9x5_clk_register_usb(struct regmap *regmap, const char *name,
  6494. const char **parent_names, u8 num_parents)
  6495. {
  6496. struct at91sam9x5_clk_usb *usb;
  6497. @@ -216,7 +212,7 @@
  6498. CLK_SET_RATE_PARENT;
  6499. usb->hw.init = &init;
  6500. - usb->pmc = pmc;
  6501. + usb->regmap = regmap;
  6502. clk = clk_register(NULL, &usb->hw);
  6503. if (IS_ERR(clk))
  6504. @@ -226,7 +222,7 @@
  6505. }
  6506. static struct clk * __init
  6507. -at91sam9n12_clk_register_usb(struct at91_pmc *pmc, const char *name,
  6508. +at91sam9n12_clk_register_usb(struct regmap *regmap, const char *name,
  6509. const char *parent_name)
  6510. {
  6511. struct at91sam9x5_clk_usb *usb;
  6512. @@ -244,7 +240,7 @@
  6513. init.flags = CLK_SET_RATE_GATE | CLK_SET_RATE_PARENT;
  6514. usb->hw.init = &init;
  6515. - usb->pmc = pmc;
  6516. + usb->regmap = regmap;
  6517. clk = clk_register(NULL, &usb->hw);
  6518. if (IS_ERR(clk))
  6519. @@ -257,12 +253,12 @@
  6520. unsigned long parent_rate)
  6521. {
  6522. struct at91rm9200_clk_usb *usb = to_at91rm9200_clk_usb(hw);
  6523. - struct at91_pmc *pmc = usb->pmc;
  6524. - u32 tmp;
  6525. + unsigned int pllbr;
  6526. u8 usbdiv;
  6527. - tmp = pmc_read(pmc, AT91_CKGR_PLLBR);
  6528. - usbdiv = (tmp & AT91_PMC_USBDIV) >> RM9200_USB_DIV_SHIFT;
  6529. + regmap_read(usb->regmap, AT91_CKGR_PLLBR, &pllbr);
  6530. +
  6531. + usbdiv = (pllbr & AT91_PMC_USBDIV) >> RM9200_USB_DIV_SHIFT;
  6532. if (usb->divisors[usbdiv])
  6533. return parent_rate / usb->divisors[usbdiv];
  6534. @@ -310,10 +306,8 @@
  6535. static int at91rm9200_clk_usb_set_rate(struct clk_hw *hw, unsigned long rate,
  6536. unsigned long parent_rate)
  6537. {
  6538. - u32 tmp;
  6539. int i;
  6540. struct at91rm9200_clk_usb *usb = to_at91rm9200_clk_usb(hw);
  6541. - struct at91_pmc *pmc = usb->pmc;
  6542. unsigned long div;
  6543. if (!rate)
  6544. @@ -323,10 +317,10 @@
  6545. for (i = 0; i < RM9200_USB_DIV_TAB_SIZE; i++) {
  6546. if (usb->divisors[i] == div) {
  6547. - tmp = pmc_read(pmc, AT91_CKGR_PLLBR) &
  6548. - ~AT91_PMC_USBDIV;
  6549. - tmp |= i << RM9200_USB_DIV_SHIFT;
  6550. - pmc_write(pmc, AT91_CKGR_PLLBR, tmp);
  6551. + regmap_update_bits(usb->regmap, AT91_CKGR_PLLBR,
  6552. + AT91_PMC_USBDIV,
  6553. + i << RM9200_USB_DIV_SHIFT);
  6554. +
  6555. return 0;
  6556. }
  6557. }
  6558. @@ -341,7 +335,7 @@
  6559. };
  6560. static struct clk * __init
  6561. -at91rm9200_clk_register_usb(struct at91_pmc *pmc, const char *name,
  6562. +at91rm9200_clk_register_usb(struct regmap *regmap, const char *name,
  6563. const char *parent_name, const u32 *divisors)
  6564. {
  6565. struct at91rm9200_clk_usb *usb;
  6566. @@ -359,7 +353,7 @@
  6567. init.flags = CLK_SET_RATE_PARENT;
  6568. usb->hw.init = &init;
  6569. - usb->pmc = pmc;
  6570. + usb->regmap = regmap;
  6571. memcpy(usb->divisors, divisors, sizeof(usb->divisors));
  6572. clk = clk_register(NULL, &usb->hw);
  6573. @@ -369,13 +363,13 @@
  6574. return clk;
  6575. }
  6576. -void __init of_at91sam9x5_clk_usb_setup(struct device_node *np,
  6577. - struct at91_pmc *pmc)
  6578. +static void __init of_at91sam9x5_clk_usb_setup(struct device_node *np)
  6579. {
  6580. struct clk *clk;
  6581. int num_parents;
  6582. const char *parent_names[USB_SOURCE_MAX];
  6583. const char *name = np->name;
  6584. + struct regmap *regmap;
  6585. num_parents = of_clk_get_parent_count(np);
  6586. if (num_parents <= 0 || num_parents > USB_SOURCE_MAX)
  6587. @@ -385,19 +379,26 @@
  6588. of_property_read_string(np, "clock-output-names", &name);
  6589. - clk = at91sam9x5_clk_register_usb(pmc, name, parent_names, num_parents);
  6590. + regmap = syscon_node_to_regmap(of_get_parent(np));
  6591. + if (IS_ERR(regmap))
  6592. + return;
  6593. +
  6594. + clk = at91sam9x5_clk_register_usb(regmap, name, parent_names,
  6595. + num_parents);
  6596. if (IS_ERR(clk))
  6597. return;
  6598. of_clk_add_provider(np, of_clk_src_simple_get, clk);
  6599. }
  6600. +CLK_OF_DECLARE(at91sam9x5_clk_usb, "atmel,at91sam9x5-clk-usb",
  6601. + of_at91sam9x5_clk_usb_setup);
  6602. -void __init of_at91sam9n12_clk_usb_setup(struct device_node *np,
  6603. - struct at91_pmc *pmc)
  6604. +static void __init of_at91sam9n12_clk_usb_setup(struct device_node *np)
  6605. {
  6606. struct clk *clk;
  6607. const char *parent_name;
  6608. const char *name = np->name;
  6609. + struct regmap *regmap;
  6610. parent_name = of_clk_get_parent_name(np, 0);
  6611. if (!parent_name)
  6612. @@ -405,20 +406,26 @@
  6613. of_property_read_string(np, "clock-output-names", &name);
  6614. - clk = at91sam9n12_clk_register_usb(pmc, name, parent_name);
  6615. + regmap = syscon_node_to_regmap(of_get_parent(np));
  6616. + if (IS_ERR(regmap))
  6617. + return;
  6618. +
  6619. + clk = at91sam9n12_clk_register_usb(regmap, name, parent_name);
  6620. if (IS_ERR(clk))
  6621. return;
  6622. of_clk_add_provider(np, of_clk_src_simple_get, clk);
  6623. }
  6624. +CLK_OF_DECLARE(at91sam9n12_clk_usb, "atmel,at91sam9n12-clk-usb",
  6625. + of_at91sam9n12_clk_usb_setup);
  6626. -void __init of_at91rm9200_clk_usb_setup(struct device_node *np,
  6627. - struct at91_pmc *pmc)
  6628. +static void __init of_at91rm9200_clk_usb_setup(struct device_node *np)
  6629. {
  6630. struct clk *clk;
  6631. const char *parent_name;
  6632. const char *name = np->name;
  6633. u32 divisors[4] = {0, 0, 0, 0};
  6634. + struct regmap *regmap;
  6635. parent_name = of_clk_get_parent_name(np, 0);
  6636. if (!parent_name)
  6637. @@ -430,9 +437,15 @@
  6638. of_property_read_string(np, "clock-output-names", &name);
  6639. - clk = at91rm9200_clk_register_usb(pmc, name, parent_name, divisors);
  6640. + regmap = syscon_node_to_regmap(of_get_parent(np));
  6641. + if (IS_ERR(regmap))
  6642. + return;
  6643. +
  6644. + clk = at91rm9200_clk_register_usb(regmap, name, parent_name, divisors);
  6645. if (IS_ERR(clk))
  6646. return;
  6647. of_clk_add_provider(np, of_clk_src_simple_get, clk);
  6648. }
  6649. +CLK_OF_DECLARE(at91rm9200_clk_usb, "atmel,at91rm9200-clk-usb",
  6650. + of_at91rm9200_clk_usb_setup);
  6651. diff -Nur linux-4.4.62.orig/drivers/clk/at91/clk-utmi.c linux-4.4.62/drivers/clk/at91/clk-utmi.c
  6652. --- linux-4.4.62.orig/drivers/clk/at91/clk-utmi.c 2017-04-18 07:15:37.000000000 +0200
  6653. +++ linux-4.4.62/drivers/clk/at91/clk-utmi.c 2017-04-18 17:38:08.046643506 +0200
  6654. @@ -11,14 +11,9 @@
  6655. #include <linux/clk-provider.h>
  6656. #include <linux/clkdev.h>
  6657. #include <linux/clk/at91_pmc.h>
  6658. -#include <linux/interrupt.h>
  6659. -#include <linux/irq.h>
  6660. #include <linux/of.h>
  6661. -#include <linux/of_address.h>
  6662. -#include <linux/of_irq.h>
  6663. -#include <linux/io.h>
  6664. -#include <linux/sched.h>
  6665. -#include <linux/wait.h>
  6666. +#include <linux/mfd/syscon.h>
  6667. +#include <linux/regmap.h>
  6668. #include "pmc.h"
  6669. @@ -26,37 +21,30 @@
  6670. struct clk_utmi {
  6671. struct clk_hw hw;
  6672. - struct at91_pmc *pmc;
  6673. - unsigned int irq;
  6674. - wait_queue_head_t wait;
  6675. + struct regmap *regmap;
  6676. };
  6677. #define to_clk_utmi(hw) container_of(hw, struct clk_utmi, hw)
  6678. -static irqreturn_t clk_utmi_irq_handler(int irq, void *dev_id)
  6679. +static inline bool clk_utmi_ready(struct regmap *regmap)
  6680. {
  6681. - struct clk_utmi *utmi = (struct clk_utmi *)dev_id;
  6682. + unsigned int status;
  6683. - wake_up(&utmi->wait);
  6684. - disable_irq_nosync(utmi->irq);
  6685. + regmap_read(regmap, AT91_PMC_SR, &status);
  6686. - return IRQ_HANDLED;
  6687. + return status & AT91_PMC_LOCKU;
  6688. }
  6689. static int clk_utmi_prepare(struct clk_hw *hw)
  6690. {
  6691. struct clk_utmi *utmi = to_clk_utmi(hw);
  6692. - struct at91_pmc *pmc = utmi->pmc;
  6693. - u32 tmp = pmc_read(pmc, AT91_CKGR_UCKR) | AT91_PMC_UPLLEN |
  6694. - AT91_PMC_UPLLCOUNT | AT91_PMC_BIASEN;
  6695. -
  6696. - pmc_write(pmc, AT91_CKGR_UCKR, tmp);
  6697. -
  6698. - while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_LOCKU)) {
  6699. - enable_irq(utmi->irq);
  6700. - wait_event(utmi->wait,
  6701. - pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_LOCKU);
  6702. - }
  6703. + unsigned int uckr = AT91_PMC_UPLLEN | AT91_PMC_UPLLCOUNT |
  6704. + AT91_PMC_BIASEN;
  6705. +
  6706. + regmap_update_bits(utmi->regmap, AT91_CKGR_UCKR, uckr, uckr);
  6707. +
  6708. + while (!clk_utmi_ready(utmi->regmap))
  6709. + cpu_relax();
  6710. return 0;
  6711. }
  6712. @@ -64,18 +52,15 @@
  6713. static int clk_utmi_is_prepared(struct clk_hw *hw)
  6714. {
  6715. struct clk_utmi *utmi = to_clk_utmi(hw);
  6716. - struct at91_pmc *pmc = utmi->pmc;
  6717. - return !!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_LOCKU);
  6718. + return clk_utmi_ready(utmi->regmap);
  6719. }
  6720. static void clk_utmi_unprepare(struct clk_hw *hw)
  6721. {
  6722. struct clk_utmi *utmi = to_clk_utmi(hw);
  6723. - struct at91_pmc *pmc = utmi->pmc;
  6724. - u32 tmp = pmc_read(pmc, AT91_CKGR_UCKR) & ~AT91_PMC_UPLLEN;
  6725. - pmc_write(pmc, AT91_CKGR_UCKR, tmp);
  6726. + regmap_update_bits(utmi->regmap, AT91_CKGR_UCKR, AT91_PMC_UPLLEN, 0);
  6727. }
  6728. static unsigned long clk_utmi_recalc_rate(struct clk_hw *hw,
  6729. @@ -93,10 +78,9 @@
  6730. };
  6731. static struct clk * __init
  6732. -at91_clk_register_utmi(struct at91_pmc *pmc, unsigned int irq,
  6733. +at91_clk_register_utmi(struct regmap *regmap,
  6734. const char *name, const char *parent_name)
  6735. {
  6736. - int ret;
  6737. struct clk_utmi *utmi;
  6738. struct clk *clk = NULL;
  6739. struct clk_init_data init;
  6740. @@ -112,52 +96,36 @@
  6741. init.flags = CLK_SET_RATE_GATE;
  6742. utmi->hw.init = &init;
  6743. - utmi->pmc = pmc;
  6744. - utmi->irq = irq;
  6745. - init_waitqueue_head(&utmi->wait);
  6746. - irq_set_status_flags(utmi->irq, IRQ_NOAUTOEN);
  6747. - ret = request_irq(utmi->irq, clk_utmi_irq_handler,
  6748. - IRQF_TRIGGER_HIGH, "clk-utmi", utmi);
  6749. - if (ret) {
  6750. - kfree(utmi);
  6751. - return ERR_PTR(ret);
  6752. - }
  6753. + utmi->regmap = regmap;
  6754. clk = clk_register(NULL, &utmi->hw);
  6755. - if (IS_ERR(clk)) {
  6756. - free_irq(utmi->irq, utmi);
  6757. + if (IS_ERR(clk))
  6758. kfree(utmi);
  6759. - }
  6760. return clk;
  6761. }
  6762. -static void __init
  6763. -of_at91_clk_utmi_setup(struct device_node *np, struct at91_pmc *pmc)
  6764. +static void __init of_at91sam9x5_clk_utmi_setup(struct device_node *np)
  6765. {
  6766. - unsigned int irq;
  6767. struct clk *clk;
  6768. const char *parent_name;
  6769. const char *name = np->name;
  6770. + struct regmap *regmap;
  6771. parent_name = of_clk_get_parent_name(np, 0);
  6772. of_property_read_string(np, "clock-output-names", &name);
  6773. - irq = irq_of_parse_and_map(np, 0);
  6774. - if (!irq)
  6775. + regmap = syscon_node_to_regmap(of_get_parent(np));
  6776. + if (IS_ERR(regmap))
  6777. return;
  6778. - clk = at91_clk_register_utmi(pmc, irq, name, parent_name);
  6779. + clk = at91_clk_register_utmi(regmap, name, parent_name);
  6780. if (IS_ERR(clk))
  6781. return;
  6782. of_clk_add_provider(np, of_clk_src_simple_get, clk);
  6783. return;
  6784. }
  6785. -
  6786. -void __init of_at91sam9x5_clk_utmi_setup(struct device_node *np,
  6787. - struct at91_pmc *pmc)
  6788. -{
  6789. - of_at91_clk_utmi_setup(np, pmc);
  6790. -}
  6791. +CLK_OF_DECLARE(at91sam9x5_clk_utmi, "atmel,at91sam9x5-clk-utmi",
  6792. + of_at91sam9x5_clk_utmi_setup);
  6793. diff -Nur linux-4.4.62.orig/drivers/clk/at91/pmc.c linux-4.4.62/drivers/clk/at91/pmc.c
  6794. --- linux-4.4.62.orig/drivers/clk/at91/pmc.c 2017-04-18 07:15:37.000000000 +0200
  6795. +++ linux-4.4.62/drivers/clk/at91/pmc.c 2017-04-18 17:38:08.046643506 +0200
  6796. @@ -12,36 +12,13 @@
  6797. #include <linux/clkdev.h>
  6798. #include <linux/clk/at91_pmc.h>
  6799. #include <linux/of.h>
  6800. -#include <linux/of_address.h>
  6801. -#include <linux/io.h>
  6802. -#include <linux/interrupt.h>
  6803. -#include <linux/irq.h>
  6804. -#include <linux/irqchip/chained_irq.h>
  6805. -#include <linux/irqdomain.h>
  6806. -#include <linux/of_irq.h>
  6807. +#include <linux/mfd/syscon.h>
  6808. +#include <linux/regmap.h>
  6809. #include <asm/proc-fns.h>
  6810. #include "pmc.h"
  6811. -void __iomem *at91_pmc_base;
  6812. -EXPORT_SYMBOL_GPL(at91_pmc_base);
  6813. -
  6814. -void at91rm9200_idle(void)
  6815. -{
  6816. - /*
  6817. - * Disable the processor clock. The processor will be automatically
  6818. - * re-enabled by an interrupt or by a reset.
  6819. - */
  6820. - at91_pmc_write(AT91_PMC_SCDR, AT91_PMC_PCK);
  6821. -}
  6822. -
  6823. -void at91sam9_idle(void)
  6824. -{
  6825. - at91_pmc_write(AT91_PMC_SCDR, AT91_PMC_PCK);
  6826. - cpu_do_idle();
  6827. -}
  6828. -
  6829. int of_at91_get_clk_range(struct device_node *np, const char *propname,
  6830. struct clk_range *range)
  6831. {
  6832. @@ -64,402 +41,3 @@
  6833. return 0;
  6834. }
  6835. EXPORT_SYMBOL_GPL(of_at91_get_clk_range);
  6836. -
  6837. -static void pmc_irq_mask(struct irq_data *d)
  6838. -{
  6839. - struct at91_pmc *pmc = irq_data_get_irq_chip_data(d);
  6840. -
  6841. - pmc_write(pmc, AT91_PMC_IDR, 1 << d->hwirq);
  6842. -}
  6843. -
  6844. -static void pmc_irq_unmask(struct irq_data *d)
  6845. -{
  6846. - struct at91_pmc *pmc = irq_data_get_irq_chip_data(d);
  6847. -
  6848. - pmc_write(pmc, AT91_PMC_IER, 1 << d->hwirq);
  6849. -}
  6850. -
  6851. -static int pmc_irq_set_type(struct irq_data *d, unsigned type)
  6852. -{
  6853. - if (type != IRQ_TYPE_LEVEL_HIGH) {
  6854. - pr_warn("PMC: type not supported (support only IRQ_TYPE_LEVEL_HIGH type)\n");
  6855. - return -EINVAL;
  6856. - }
  6857. -
  6858. - return 0;
  6859. -}
  6860. -
  6861. -static void pmc_irq_suspend(struct irq_data *d)
  6862. -{
  6863. - struct at91_pmc *pmc = irq_data_get_irq_chip_data(d);
  6864. -
  6865. - pmc->imr = pmc_read(pmc, AT91_PMC_IMR);
  6866. - pmc_write(pmc, AT91_PMC_IDR, pmc->imr);
  6867. -}
  6868. -
  6869. -static void pmc_irq_resume(struct irq_data *d)
  6870. -{
  6871. - struct at91_pmc *pmc = irq_data_get_irq_chip_data(d);
  6872. -
  6873. - pmc_write(pmc, AT91_PMC_IER, pmc->imr);
  6874. -}
  6875. -
  6876. -static struct irq_chip pmc_irq = {
  6877. - .name = "PMC",
  6878. - .irq_disable = pmc_irq_mask,
  6879. - .irq_mask = pmc_irq_mask,
  6880. - .irq_unmask = pmc_irq_unmask,
  6881. - .irq_set_type = pmc_irq_set_type,
  6882. - .irq_suspend = pmc_irq_suspend,
  6883. - .irq_resume = pmc_irq_resume,
  6884. -};
  6885. -
  6886. -static struct lock_class_key pmc_lock_class;
  6887. -
  6888. -static int pmc_irq_map(struct irq_domain *h, unsigned int virq,
  6889. - irq_hw_number_t hw)
  6890. -{
  6891. - struct at91_pmc *pmc = h->host_data;
  6892. -
  6893. - irq_set_lockdep_class(virq, &pmc_lock_class);
  6894. -
  6895. - irq_set_chip_and_handler(virq, &pmc_irq,
  6896. - handle_level_irq);
  6897. - irq_set_chip_data(virq, pmc);
  6898. -
  6899. - return 0;
  6900. -}
  6901. -
  6902. -static int pmc_irq_domain_xlate(struct irq_domain *d,
  6903. - struct device_node *ctrlr,
  6904. - const u32 *intspec, unsigned int intsize,
  6905. - irq_hw_number_t *out_hwirq,
  6906. - unsigned int *out_type)
  6907. -{
  6908. - struct at91_pmc *pmc = d->host_data;
  6909. - const struct at91_pmc_caps *caps = pmc->caps;
  6910. -
  6911. - if (WARN_ON(intsize < 1))
  6912. - return -EINVAL;
  6913. -
  6914. - *out_hwirq = intspec[0];
  6915. -
  6916. - if (!(caps->available_irqs & (1 << *out_hwirq)))
  6917. - return -EINVAL;
  6918. -
  6919. - *out_type = IRQ_TYPE_LEVEL_HIGH;
  6920. -
  6921. - return 0;
  6922. -}
  6923. -
  6924. -static const struct irq_domain_ops pmc_irq_ops = {
  6925. - .map = pmc_irq_map,
  6926. - .xlate = pmc_irq_domain_xlate,
  6927. -};
  6928. -
  6929. -static irqreturn_t pmc_irq_handler(int irq, void *data)
  6930. -{
  6931. - struct at91_pmc *pmc = (struct at91_pmc *)data;
  6932. - unsigned long sr;
  6933. - int n;
  6934. -
  6935. - sr = pmc_read(pmc, AT91_PMC_SR) & pmc_read(pmc, AT91_PMC_IMR);
  6936. - if (!sr)
  6937. - return IRQ_NONE;
  6938. -
  6939. - for_each_set_bit(n, &sr, BITS_PER_LONG)
  6940. - generic_handle_irq(irq_find_mapping(pmc->irqdomain, n));
  6941. -
  6942. - return IRQ_HANDLED;
  6943. -}
  6944. -
  6945. -static const struct at91_pmc_caps at91rm9200_caps = {
  6946. - .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_LOCKB |
  6947. - AT91_PMC_MCKRDY | AT91_PMC_PCK0RDY |
  6948. - AT91_PMC_PCK1RDY | AT91_PMC_PCK2RDY |
  6949. - AT91_PMC_PCK3RDY,
  6950. -};
  6951. -
  6952. -static const struct at91_pmc_caps at91sam9260_caps = {
  6953. - .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_LOCKB |
  6954. - AT91_PMC_MCKRDY | AT91_PMC_PCK0RDY |
  6955. - AT91_PMC_PCK1RDY,
  6956. -};
  6957. -
  6958. -static const struct at91_pmc_caps at91sam9g45_caps = {
  6959. - .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_MCKRDY |
  6960. - AT91_PMC_LOCKU | AT91_PMC_PCK0RDY |
  6961. - AT91_PMC_PCK1RDY,
  6962. -};
  6963. -
  6964. -static const struct at91_pmc_caps at91sam9n12_caps = {
  6965. - .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_LOCKB |
  6966. - AT91_PMC_MCKRDY | AT91_PMC_PCK0RDY |
  6967. - AT91_PMC_PCK1RDY | AT91_PMC_MOSCSELS |
  6968. - AT91_PMC_MOSCRCS | AT91_PMC_CFDEV,
  6969. -};
  6970. -
  6971. -static const struct at91_pmc_caps at91sam9x5_caps = {
  6972. - .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_MCKRDY |
  6973. - AT91_PMC_LOCKU | AT91_PMC_PCK0RDY |
  6974. - AT91_PMC_PCK1RDY | AT91_PMC_MOSCSELS |
  6975. - AT91_PMC_MOSCRCS | AT91_PMC_CFDEV,
  6976. -};
  6977. -
  6978. -static const struct at91_pmc_caps sama5d2_caps = {
  6979. - .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_MCKRDY |
  6980. - AT91_PMC_LOCKU | AT91_PMC_PCK0RDY |
  6981. - AT91_PMC_PCK1RDY | AT91_PMC_PCK2RDY |
  6982. - AT91_PMC_MOSCSELS | AT91_PMC_MOSCRCS |
  6983. - AT91_PMC_CFDEV | AT91_PMC_GCKRDY,
  6984. -};
  6985. -
  6986. -static const struct at91_pmc_caps sama5d3_caps = {
  6987. - .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_MCKRDY |
  6988. - AT91_PMC_LOCKU | AT91_PMC_PCK0RDY |
  6989. - AT91_PMC_PCK1RDY | AT91_PMC_PCK2RDY |
  6990. - AT91_PMC_MOSCSELS | AT91_PMC_MOSCRCS |
  6991. - AT91_PMC_CFDEV,
  6992. -};
  6993. -
  6994. -static struct at91_pmc *__init at91_pmc_init(struct device_node *np,
  6995. - void __iomem *regbase, int virq,
  6996. - const struct at91_pmc_caps *caps)
  6997. -{
  6998. - struct at91_pmc *pmc;
  6999. -
  7000. - if (!regbase || !virq || !caps)
  7001. - return NULL;
  7002. -
  7003. - at91_pmc_base = regbase;
  7004. -
  7005. - pmc = kzalloc(sizeof(*pmc), GFP_KERNEL);
  7006. - if (!pmc)
  7007. - return NULL;
  7008. -
  7009. - spin_lock_init(&pmc->lock);
  7010. - pmc->regbase = regbase;
  7011. - pmc->virq = virq;
  7012. - pmc->caps = caps;
  7013. -
  7014. - pmc->irqdomain = irq_domain_add_linear(np, 32, &pmc_irq_ops, pmc);
  7015. -
  7016. - if (!pmc->irqdomain)
  7017. - goto out_free_pmc;
  7018. -
  7019. - pmc_write(pmc, AT91_PMC_IDR, 0xffffffff);
  7020. - if (request_irq(pmc->virq, pmc_irq_handler,
  7021. - IRQF_SHARED | IRQF_COND_SUSPEND, "pmc", pmc))
  7022. - goto out_remove_irqdomain;
  7023. -
  7024. - return pmc;
  7025. -
  7026. -out_remove_irqdomain:
  7027. - irq_domain_remove(pmc->irqdomain);
  7028. -out_free_pmc:
  7029. - kfree(pmc);
  7030. -
  7031. - return NULL;
  7032. -}
  7033. -
  7034. -static const struct of_device_id pmc_clk_ids[] __initconst = {
  7035. - /* Slow oscillator */
  7036. - {
  7037. - .compatible = "atmel,at91sam9260-clk-slow",
  7038. - .data = of_at91sam9260_clk_slow_setup,
  7039. - },
  7040. - /* Main clock */
  7041. - {
  7042. - .compatible = "atmel,at91rm9200-clk-main-osc",
  7043. - .data = of_at91rm9200_clk_main_osc_setup,
  7044. - },
  7045. - {
  7046. - .compatible = "atmel,at91sam9x5-clk-main-rc-osc",
  7047. - .data = of_at91sam9x5_clk_main_rc_osc_setup,
  7048. - },
  7049. - {
  7050. - .compatible = "atmel,at91rm9200-clk-main",
  7051. - .data = of_at91rm9200_clk_main_setup,
  7052. - },
  7053. - {
  7054. - .compatible = "atmel,at91sam9x5-clk-main",
  7055. - .data = of_at91sam9x5_clk_main_setup,
  7056. - },
  7057. - /* PLL clocks */
  7058. - {
  7059. - .compatible = "atmel,at91rm9200-clk-pll",
  7060. - .data = of_at91rm9200_clk_pll_setup,
  7061. - },
  7062. - {
  7063. - .compatible = "atmel,at91sam9g45-clk-pll",
  7064. - .data = of_at91sam9g45_clk_pll_setup,
  7065. - },
  7066. - {
  7067. - .compatible = "atmel,at91sam9g20-clk-pllb",
  7068. - .data = of_at91sam9g20_clk_pllb_setup,
  7069. - },
  7070. - {
  7071. - .compatible = "atmel,sama5d3-clk-pll",
  7072. - .data = of_sama5d3_clk_pll_setup,
  7073. - },
  7074. - {
  7075. - .compatible = "atmel,at91sam9x5-clk-plldiv",
  7076. - .data = of_at91sam9x5_clk_plldiv_setup,
  7077. - },
  7078. - /* Master clock */
  7079. - {
  7080. - .compatible = "atmel,at91rm9200-clk-master",
  7081. - .data = of_at91rm9200_clk_master_setup,
  7082. - },
  7083. - {
  7084. - .compatible = "atmel,at91sam9x5-clk-master",
  7085. - .data = of_at91sam9x5_clk_master_setup,
  7086. - },
  7087. - /* System clocks */
  7088. - {
  7089. - .compatible = "atmel,at91rm9200-clk-system",
  7090. - .data = of_at91rm9200_clk_sys_setup,
  7091. - },
  7092. - /* Peripheral clocks */
  7093. - {
  7094. - .compatible = "atmel,at91rm9200-clk-peripheral",
  7095. - .data = of_at91rm9200_clk_periph_setup,
  7096. - },
  7097. - {
  7098. - .compatible = "atmel,at91sam9x5-clk-peripheral",
  7099. - .data = of_at91sam9x5_clk_periph_setup,
  7100. - },
  7101. - /* Programmable clocks */
  7102. - {
  7103. - .compatible = "atmel,at91rm9200-clk-programmable",
  7104. - .data = of_at91rm9200_clk_prog_setup,
  7105. - },
  7106. - {
  7107. - .compatible = "atmel,at91sam9g45-clk-programmable",
  7108. - .data = of_at91sam9g45_clk_prog_setup,
  7109. - },
  7110. - {
  7111. - .compatible = "atmel,at91sam9x5-clk-programmable",
  7112. - .data = of_at91sam9x5_clk_prog_setup,
  7113. - },
  7114. - /* UTMI clock */
  7115. -#if defined(CONFIG_HAVE_AT91_UTMI)
  7116. - {
  7117. - .compatible = "atmel,at91sam9x5-clk-utmi",
  7118. - .data = of_at91sam9x5_clk_utmi_setup,
  7119. - },
  7120. -#endif
  7121. - /* USB clock */
  7122. -#if defined(CONFIG_HAVE_AT91_USB_CLK)
  7123. - {
  7124. - .compatible = "atmel,at91rm9200-clk-usb",
  7125. - .data = of_at91rm9200_clk_usb_setup,
  7126. - },
  7127. - {
  7128. - .compatible = "atmel,at91sam9x5-clk-usb",
  7129. - .data = of_at91sam9x5_clk_usb_setup,
  7130. - },
  7131. - {
  7132. - .compatible = "atmel,at91sam9n12-clk-usb",
  7133. - .data = of_at91sam9n12_clk_usb_setup,
  7134. - },
  7135. -#endif
  7136. - /* SMD clock */
  7137. -#if defined(CONFIG_HAVE_AT91_SMD)
  7138. - {
  7139. - .compatible = "atmel,at91sam9x5-clk-smd",
  7140. - .data = of_at91sam9x5_clk_smd_setup,
  7141. - },
  7142. -#endif
  7143. -#if defined(CONFIG_HAVE_AT91_H32MX)
  7144. - {
  7145. - .compatible = "atmel,sama5d4-clk-h32mx",
  7146. - .data = of_sama5d4_clk_h32mx_setup,
  7147. - },
  7148. -#endif
  7149. -#if defined(CONFIG_HAVE_AT91_GENERATED_CLK)
  7150. - {
  7151. - .compatible = "atmel,sama5d2-clk-generated",
  7152. - .data = of_sama5d2_clk_generated_setup,
  7153. - },
  7154. -#endif
  7155. - { /*sentinel*/ }
  7156. -};
  7157. -
  7158. -static void __init of_at91_pmc_setup(struct device_node *np,
  7159. - const struct at91_pmc_caps *caps)
  7160. -{
  7161. - struct at91_pmc *pmc;
  7162. - struct device_node *childnp;
  7163. - void (*clk_setup)(struct device_node *, struct at91_pmc *);
  7164. - const struct of_device_id *clk_id;
  7165. - void __iomem *regbase = of_iomap(np, 0);
  7166. - int virq;
  7167. -
  7168. - if (!regbase)
  7169. - return;
  7170. -
  7171. - virq = irq_of_parse_and_map(np, 0);
  7172. - if (!virq)
  7173. - return;
  7174. -
  7175. - pmc = at91_pmc_init(np, regbase, virq, caps);
  7176. - if (!pmc)
  7177. - return;
  7178. - for_each_child_of_node(np, childnp) {
  7179. - clk_id = of_match_node(pmc_clk_ids, childnp);
  7180. - if (!clk_id)
  7181. - continue;
  7182. - clk_setup = clk_id->data;
  7183. - clk_setup(childnp, pmc);
  7184. - }
  7185. -}
  7186. -
  7187. -static void __init of_at91rm9200_pmc_setup(struct device_node *np)
  7188. -{
  7189. - of_at91_pmc_setup(np, &at91rm9200_caps);
  7190. -}
  7191. -CLK_OF_DECLARE(at91rm9200_clk_pmc, "atmel,at91rm9200-pmc",
  7192. - of_at91rm9200_pmc_setup);
  7193. -
  7194. -static void __init of_at91sam9260_pmc_setup(struct device_node *np)
  7195. -{
  7196. - of_at91_pmc_setup(np, &at91sam9260_caps);
  7197. -}
  7198. -CLK_OF_DECLARE(at91sam9260_clk_pmc, "atmel,at91sam9260-pmc",
  7199. - of_at91sam9260_pmc_setup);
  7200. -
  7201. -static void __init of_at91sam9g45_pmc_setup(struct device_node *np)
  7202. -{
  7203. - of_at91_pmc_setup(np, &at91sam9g45_caps);
  7204. -}
  7205. -CLK_OF_DECLARE(at91sam9g45_clk_pmc, "atmel,at91sam9g45-pmc",
  7206. - of_at91sam9g45_pmc_setup);
  7207. -
  7208. -static void __init of_at91sam9n12_pmc_setup(struct device_node *np)
  7209. -{
  7210. - of_at91_pmc_setup(np, &at91sam9n12_caps);
  7211. -}
  7212. -CLK_OF_DECLARE(at91sam9n12_clk_pmc, "atmel,at91sam9n12-pmc",
  7213. - of_at91sam9n12_pmc_setup);
  7214. -
  7215. -static void __init of_at91sam9x5_pmc_setup(struct device_node *np)
  7216. -{
  7217. - of_at91_pmc_setup(np, &at91sam9x5_caps);
  7218. -}
  7219. -CLK_OF_DECLARE(at91sam9x5_clk_pmc, "atmel,at91sam9x5-pmc",
  7220. - of_at91sam9x5_pmc_setup);
  7221. -
  7222. -static void __init of_sama5d2_pmc_setup(struct device_node *np)
  7223. -{
  7224. - of_at91_pmc_setup(np, &sama5d2_caps);
  7225. -}
  7226. -CLK_OF_DECLARE(sama5d2_clk_pmc, "atmel,sama5d2-pmc",
  7227. - of_sama5d2_pmc_setup);
  7228. -
  7229. -static void __init of_sama5d3_pmc_setup(struct device_node *np)
  7230. -{
  7231. - of_at91_pmc_setup(np, &sama5d3_caps);
  7232. -}
  7233. -CLK_OF_DECLARE(sama5d3_clk_pmc, "atmel,sama5d3-pmc",
  7234. - of_sama5d3_pmc_setup);
  7235. diff -Nur linux-4.4.62.orig/drivers/clk/at91/pmc.h linux-4.4.62/drivers/clk/at91/pmc.h
  7236. --- linux-4.4.62.orig/drivers/clk/at91/pmc.h 2017-04-18 07:15:37.000000000 +0200
  7237. +++ linux-4.4.62/drivers/clk/at91/pmc.h 2017-04-18 17:38:08.046643506 +0200
  7238. @@ -14,8 +14,11 @@
  7239. #include <linux/io.h>
  7240. #include <linux/irqdomain.h>
  7241. +#include <linux/regmap.h>
  7242. #include <linux/spinlock.h>
  7243. +extern spinlock_t pmc_pcr_lock;
  7244. +
  7245. struct clk_range {
  7246. unsigned long min;
  7247. unsigned long max;
  7248. @@ -23,102 +26,7 @@
  7249. #define CLK_RANGE(MIN, MAX) {.min = MIN, .max = MAX,}
  7250. -struct at91_pmc_caps {
  7251. - u32 available_irqs;
  7252. -};
  7253. -
  7254. -struct at91_pmc {
  7255. - void __iomem *regbase;
  7256. - int virq;
  7257. - spinlock_t lock;
  7258. - const struct at91_pmc_caps *caps;
  7259. - struct irq_domain *irqdomain;
  7260. - u32 imr;
  7261. -};
  7262. -
  7263. -static inline void pmc_lock(struct at91_pmc *pmc)
  7264. -{
  7265. - spin_lock(&pmc->lock);
  7266. -}
  7267. -
  7268. -static inline void pmc_unlock(struct at91_pmc *pmc)
  7269. -{
  7270. - spin_unlock(&pmc->lock);
  7271. -}
  7272. -
  7273. -static inline u32 pmc_read(struct at91_pmc *pmc, int offset)
  7274. -{
  7275. - return readl(pmc->regbase + offset);
  7276. -}
  7277. -
  7278. -static inline void pmc_write(struct at91_pmc *pmc, int offset, u32 value)
  7279. -{
  7280. - writel(value, pmc->regbase + offset);
  7281. -}
  7282. -
  7283. int of_at91_get_clk_range(struct device_node *np, const char *propname,
  7284. struct clk_range *range);
  7285. -void of_at91sam9260_clk_slow_setup(struct device_node *np,
  7286. - struct at91_pmc *pmc);
  7287. -
  7288. -void of_at91rm9200_clk_main_osc_setup(struct device_node *np,
  7289. - struct at91_pmc *pmc);
  7290. -void of_at91sam9x5_clk_main_rc_osc_setup(struct device_node *np,
  7291. - struct at91_pmc *pmc);
  7292. -void of_at91rm9200_clk_main_setup(struct device_node *np,
  7293. - struct at91_pmc *pmc);
  7294. -void of_at91sam9x5_clk_main_setup(struct device_node *np,
  7295. - struct at91_pmc *pmc);
  7296. -
  7297. -void of_at91rm9200_clk_pll_setup(struct device_node *np,
  7298. - struct at91_pmc *pmc);
  7299. -void of_at91sam9g45_clk_pll_setup(struct device_node *np,
  7300. - struct at91_pmc *pmc);
  7301. -void of_at91sam9g20_clk_pllb_setup(struct device_node *np,
  7302. - struct at91_pmc *pmc);
  7303. -void of_sama5d3_clk_pll_setup(struct device_node *np,
  7304. - struct at91_pmc *pmc);
  7305. -void of_at91sam9x5_clk_plldiv_setup(struct device_node *np,
  7306. - struct at91_pmc *pmc);
  7307. -
  7308. -void of_at91rm9200_clk_master_setup(struct device_node *np,
  7309. - struct at91_pmc *pmc);
  7310. -void of_at91sam9x5_clk_master_setup(struct device_node *np,
  7311. - struct at91_pmc *pmc);
  7312. -
  7313. -void of_at91rm9200_clk_sys_setup(struct device_node *np,
  7314. - struct at91_pmc *pmc);
  7315. -
  7316. -void of_at91rm9200_clk_periph_setup(struct device_node *np,
  7317. - struct at91_pmc *pmc);
  7318. -void of_at91sam9x5_clk_periph_setup(struct device_node *np,
  7319. - struct at91_pmc *pmc);
  7320. -
  7321. -void of_at91rm9200_clk_prog_setup(struct device_node *np,
  7322. - struct at91_pmc *pmc);
  7323. -void of_at91sam9g45_clk_prog_setup(struct device_node *np,
  7324. - struct at91_pmc *pmc);
  7325. -void of_at91sam9x5_clk_prog_setup(struct device_node *np,
  7326. - struct at91_pmc *pmc);
  7327. -
  7328. -void of_at91sam9x5_clk_utmi_setup(struct device_node *np,
  7329. - struct at91_pmc *pmc);
  7330. -
  7331. -void of_at91rm9200_clk_usb_setup(struct device_node *np,
  7332. - struct at91_pmc *pmc);
  7333. -void of_at91sam9x5_clk_usb_setup(struct device_node *np,
  7334. - struct at91_pmc *pmc);
  7335. -void of_at91sam9n12_clk_usb_setup(struct device_node *np,
  7336. - struct at91_pmc *pmc);
  7337. -
  7338. -void of_at91sam9x5_clk_smd_setup(struct device_node *np,
  7339. - struct at91_pmc *pmc);
  7340. -
  7341. -void of_sama5d4_clk_h32mx_setup(struct device_node *np,
  7342. - struct at91_pmc *pmc);
  7343. -
  7344. -void of_sama5d2_clk_generated_setup(struct device_node *np,
  7345. - struct at91_pmc *pmc);
  7346. -
  7347. #endif /* __PMC_H_ */
  7348. diff -Nur linux-4.4.62.orig/drivers/clocksource/tcb_clksrc.c linux-4.4.62/drivers/clocksource/tcb_clksrc.c
  7349. --- linux-4.4.62.orig/drivers/clocksource/tcb_clksrc.c 2017-04-18 07:15:37.000000000 +0200
  7350. +++ linux-4.4.62/drivers/clocksource/tcb_clksrc.c 2017-04-18 17:38:08.046643506 +0200
  7351. @@ -23,8 +23,7 @@
  7352. * this 32 bit free-running counter. the second channel is not used.
  7353. *
  7354. * - The third channel may be used to provide a 16-bit clockevent
  7355. - * source, used in either periodic or oneshot mode. This runs
  7356. - * at 32 KiHZ, and can handle delays of up to two seconds.
  7357. + * source, used in either periodic or oneshot mode.
  7358. *
  7359. * A boot clocksource and clockevent source are also currently needed,
  7360. * unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so
  7361. @@ -74,6 +73,8 @@
  7362. struct tc_clkevt_device {
  7363. struct clock_event_device clkevt;
  7364. struct clk *clk;
  7365. + bool clk_enabled;
  7366. + u32 freq;
  7367. void __iomem *regs;
  7368. };
  7369. @@ -82,15 +83,26 @@
  7370. return container_of(clkevt, struct tc_clkevt_device, clkevt);
  7371. }
  7372. -/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
  7373. - * because using one of the divided clocks would usually mean the
  7374. - * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
  7375. - *
  7376. - * A divided clock could be good for high resolution timers, since
  7377. - * 30.5 usec resolution can seem "low".
  7378. - */
  7379. static u32 timer_clock;
  7380. +static void tc_clk_disable(struct clock_event_device *d)
  7381. +{
  7382. + struct tc_clkevt_device *tcd = to_tc_clkevt(d);
  7383. +
  7384. + clk_disable(tcd->clk);
  7385. + tcd->clk_enabled = false;
  7386. +}
  7387. +
  7388. +static void tc_clk_enable(struct clock_event_device *d)
  7389. +{
  7390. + struct tc_clkevt_device *tcd = to_tc_clkevt(d);
  7391. +
  7392. + if (tcd->clk_enabled)
  7393. + return;
  7394. + clk_enable(tcd->clk);
  7395. + tcd->clk_enabled = true;
  7396. +}
  7397. +
  7398. static int tc_shutdown(struct clock_event_device *d)
  7399. {
  7400. struct tc_clkevt_device *tcd = to_tc_clkevt(d);
  7401. @@ -98,8 +110,14 @@
  7402. __raw_writel(0xff, regs + ATMEL_TC_REG(2, IDR));
  7403. __raw_writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR));
  7404. + return 0;
  7405. +}
  7406. +
  7407. +static int tc_shutdown_clk_off(struct clock_event_device *d)
  7408. +{
  7409. + tc_shutdown(d);
  7410. if (!clockevent_state_detached(d))
  7411. - clk_disable(tcd->clk);
  7412. + tc_clk_disable(d);
  7413. return 0;
  7414. }
  7415. @@ -112,9 +130,9 @@
  7416. if (clockevent_state_oneshot(d) || clockevent_state_periodic(d))
  7417. tc_shutdown(d);
  7418. - clk_enable(tcd->clk);
  7419. + tc_clk_enable(d);
  7420. - /* slow clock, count up to RC, then irq and stop */
  7421. + /* count up to RC, then irq and stop */
  7422. __raw_writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE |
  7423. ATMEL_TC_WAVESEL_UP_AUTO, regs + ATMEL_TC_REG(2, CMR));
  7424. __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
  7425. @@ -134,12 +152,12 @@
  7426. /* By not making the gentime core emulate periodic mode on top
  7427. * of oneshot, we get lower overhead and improved accuracy.
  7428. */
  7429. - clk_enable(tcd->clk);
  7430. + tc_clk_enable(d);
  7431. - /* slow clock, count up to RC, then irq and restart */
  7432. + /* count up to RC, then irq and restart */
  7433. __raw_writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
  7434. regs + ATMEL_TC_REG(2, CMR));
  7435. - __raw_writel((32768 + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
  7436. + __raw_writel((tcd->freq + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
  7437. /* Enable clock and interrupts on RC compare */
  7438. __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
  7439. @@ -166,9 +184,13 @@
  7440. .features = CLOCK_EVT_FEAT_PERIODIC |
  7441. CLOCK_EVT_FEAT_ONESHOT,
  7442. /* Should be lower than at91rm9200's system timer */
  7443. +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
  7444. .rating = 125,
  7445. +#else
  7446. + .rating = 200,
  7447. +#endif
  7448. .set_next_event = tc_next_event,
  7449. - .set_state_shutdown = tc_shutdown,
  7450. + .set_state_shutdown = tc_shutdown_clk_off,
  7451. .set_state_periodic = tc_set_periodic,
  7452. .set_state_oneshot = tc_set_oneshot,
  7453. },
  7454. @@ -188,8 +210,9 @@
  7455. return IRQ_NONE;
  7456. }
  7457. -static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
  7458. +static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx)
  7459. {
  7460. + unsigned divisor = atmel_tc_divisors[divisor_idx];
  7461. int ret;
  7462. struct clk *t2_clk = tc->clk[2];
  7463. int irq = tc->irq[2];
  7464. @@ -210,7 +233,11 @@
  7465. clkevt.regs = tc->regs;
  7466. clkevt.clk = t2_clk;
  7467. - timer_clock = clk32k_divisor_idx;
  7468. + timer_clock = divisor_idx;
  7469. + if (!divisor)
  7470. + clkevt.freq = 32768;
  7471. + else
  7472. + clkevt.freq = clk_get_rate(t2_clk) / divisor;
  7473. clkevt.clkevt.cpumask = cpumask_of(0);
  7474. @@ -221,7 +248,7 @@
  7475. return ret;
  7476. }
  7477. - clockevents_config_and_register(&clkevt.clkevt, 32768, 1, 0xffff);
  7478. + clockevents_config_and_register(&clkevt.clkevt, clkevt.freq, 1, 0xffff);
  7479. return ret;
  7480. }
  7481. @@ -358,7 +385,11 @@
  7482. goto err_disable_t1;
  7483. /* channel 2: periodic and oneshot timer support */
  7484. +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
  7485. ret = setup_clkevents(tc, clk32k_divisor_idx);
  7486. +#else
  7487. + ret = setup_clkevents(tc, best_divisor_idx);
  7488. +#endif
  7489. if (ret)
  7490. goto err_unregister_clksrc;
  7491. diff -Nur linux-4.4.62.orig/drivers/clocksource/timer-atmel-pit.c linux-4.4.62/drivers/clocksource/timer-atmel-pit.c
  7492. --- linux-4.4.62.orig/drivers/clocksource/timer-atmel-pit.c 2017-04-18 07:15:37.000000000 +0200
  7493. +++ linux-4.4.62/drivers/clocksource/timer-atmel-pit.c 2017-04-18 17:38:08.046643506 +0200
  7494. @@ -46,6 +46,7 @@
  7495. u32 cycle;
  7496. u32 cnt;
  7497. unsigned int irq;
  7498. + bool irq_requested;
  7499. struct clk *mck;
  7500. };
  7501. @@ -96,15 +97,29 @@
  7502. /* disable irq, leaving the clocksource active */
  7503. pit_write(data->base, AT91_PIT_MR, (data->cycle - 1) | AT91_PIT_PITEN);
  7504. + if (data->irq_requested) {
  7505. + free_irq(data->irq, data);
  7506. + data->irq_requested = false;
  7507. + }
  7508. return 0;
  7509. }
  7510. +static irqreturn_t at91sam926x_pit_interrupt(int irq, void *dev_id);
  7511. /*
  7512. * Clockevent device: interrupts every 1/HZ (== pit_cycles * MCK/16)
  7513. */
  7514. static int pit_clkevt_set_periodic(struct clock_event_device *dev)
  7515. {
  7516. struct pit_data *data = clkevt_to_pit_data(dev);
  7517. + int ret;
  7518. +
  7519. + ret = request_irq(data->irq, at91sam926x_pit_interrupt,
  7520. + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
  7521. + "at91_tick", data);
  7522. + if (ret)
  7523. + panic(pr_fmt("Unable to setup IRQ\n"));
  7524. +
  7525. + data->irq_requested = true;
  7526. /* update clocksource counter */
  7527. data->cnt += data->cycle * PIT_PICNT(pit_read(data->base, AT91_PIT_PIVR));
  7528. @@ -181,7 +196,6 @@
  7529. {
  7530. unsigned long pit_rate;
  7531. unsigned bits;
  7532. - int ret;
  7533. /*
  7534. * Use our actual MCK to figure out how many MCK/16 ticks per
  7535. @@ -206,13 +220,6 @@
  7536. data->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS;
  7537. clocksource_register_hz(&data->clksrc, pit_rate);
  7538. - /* Set up irq handler */
  7539. - ret = request_irq(data->irq, at91sam926x_pit_interrupt,
  7540. - IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
  7541. - "at91_tick", data);
  7542. - if (ret)
  7543. - panic(pr_fmt("Unable to setup IRQ\n"));
  7544. -
  7545. /* Set up and register clockevents */
  7546. data->clkevt.name = "pit";
  7547. data->clkevt.features = CLOCK_EVT_FEAT_PERIODIC;
  7548. diff -Nur linux-4.4.62.orig/drivers/clocksource/timer-atmel-st.c linux-4.4.62/drivers/clocksource/timer-atmel-st.c
  7549. --- linux-4.4.62.orig/drivers/clocksource/timer-atmel-st.c 2017-04-18 07:15:37.000000000 +0200
  7550. +++ linux-4.4.62/drivers/clocksource/timer-atmel-st.c 2017-04-18 17:38:08.046643506 +0200
  7551. @@ -115,18 +115,29 @@
  7552. last_crtr = read_CRTR();
  7553. }
  7554. +static int atmel_st_irq;
  7555. +
  7556. static int clkevt32k_shutdown(struct clock_event_device *evt)
  7557. {
  7558. clkdev32k_disable_and_flush_irq();
  7559. irqmask = 0;
  7560. regmap_write(regmap_st, AT91_ST_IER, irqmask);
  7561. + free_irq(atmel_st_irq, regmap_st);
  7562. return 0;
  7563. }
  7564. static int clkevt32k_set_oneshot(struct clock_event_device *dev)
  7565. {
  7566. + int ret;
  7567. +
  7568. clkdev32k_disable_and_flush_irq();
  7569. + ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt,
  7570. + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
  7571. + "at91_tick", regmap_st);
  7572. + if (ret)
  7573. + panic(pr_fmt("Unable to setup IRQ\n"));
  7574. +
  7575. /*
  7576. * ALM for oneshot irqs, set by next_event()
  7577. * before 32 seconds have passed.
  7578. @@ -139,8 +150,16 @@
  7579. static int clkevt32k_set_periodic(struct clock_event_device *dev)
  7580. {
  7581. + int ret;
  7582. +
  7583. clkdev32k_disable_and_flush_irq();
  7584. + ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt,
  7585. + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
  7586. + "at91_tick", regmap_st);
  7587. + if (ret)
  7588. + panic(pr_fmt("Unable to setup IRQ\n"));
  7589. +
  7590. /* PIT for periodic irqs; fixed rate of 1/HZ */
  7591. irqmask = AT91_ST_PITS;
  7592. regmap_write(regmap_st, AT91_ST_PIMR, timer_latch);
  7593. @@ -198,7 +217,7 @@
  7594. {
  7595. struct clk *sclk;
  7596. unsigned int sclk_rate, val;
  7597. - int irq, ret;
  7598. + int ret;
  7599. regmap_st = syscon_node_to_regmap(node);
  7600. if (IS_ERR(regmap_st))
  7601. @@ -210,17 +229,10 @@
  7602. regmap_read(regmap_st, AT91_ST_SR, &val);
  7603. /* Get the interrupts property */
  7604. - irq = irq_of_parse_and_map(node, 0);
  7605. - if (!irq)
  7606. + atmel_st_irq = irq_of_parse_and_map(node, 0);
  7607. + if (!atmel_st_irq)
  7608. panic(pr_fmt("Unable to get IRQ from DT\n"));
  7609. - /* Make IRQs happen for the system timer */
  7610. - ret = request_irq(irq, at91rm9200_timer_interrupt,
  7611. - IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
  7612. - "at91_tick", regmap_st);
  7613. - if (ret)
  7614. - panic(pr_fmt("Unable to setup IRQ\n"));
  7615. -
  7616. sclk = of_clk_get(node, 0);
  7617. if (IS_ERR(sclk))
  7618. panic(pr_fmt("Unable to get slow clock\n"));
  7619. diff -Nur linux-4.4.62.orig/drivers/cpufreq/Kconfig.x86 linux-4.4.62/drivers/cpufreq/Kconfig.x86
  7620. --- linux-4.4.62.orig/drivers/cpufreq/Kconfig.x86 2017-04-18 07:15:37.000000000 +0200
  7621. +++ linux-4.4.62/drivers/cpufreq/Kconfig.x86 2017-04-18 17:38:08.046643506 +0200
  7622. @@ -123,7 +123,7 @@
  7623. config X86_POWERNOW_K8
  7624. tristate "AMD Opteron/Athlon64 PowerNow!"
  7625. - depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ
  7626. + depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ && !PREEMPT_RT_BASE
  7627. help
  7628. This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors.
  7629. Support for K10 and newer processors is now in acpi-cpufreq.
  7630. diff -Nur linux-4.4.62.orig/drivers/cpuidle/coupled.c linux-4.4.62/drivers/cpuidle/coupled.c
  7631. --- linux-4.4.62.orig/drivers/cpuidle/coupled.c 2017-04-18 07:15:37.000000000 +0200
  7632. +++ linux-4.4.62/drivers/cpuidle/coupled.c 2017-04-18 17:38:08.046643506 +0200
  7633. @@ -119,7 +119,6 @@
  7634. #define CPUIDLE_COUPLED_NOT_IDLE (-1)
  7635. -static DEFINE_MUTEX(cpuidle_coupled_lock);
  7636. static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb);
  7637. /*
  7638. diff -Nur linux-4.4.62.orig/drivers/gpu/drm/i915/i915_gem_execbuffer.c linux-4.4.62/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  7639. --- linux-4.4.62.orig/drivers/gpu/drm/i915/i915_gem_execbuffer.c 2017-04-18 07:15:37.000000000 +0200
  7640. +++ linux-4.4.62/drivers/gpu/drm/i915/i915_gem_execbuffer.c 2017-04-18 17:38:08.046643506 +0200
  7641. @@ -1264,7 +1264,9 @@
  7642. if (ret)
  7643. return ret;
  7644. +#ifndef CONFIG_PREEMPT_RT_BASE
  7645. trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
  7646. +#endif
  7647. i915_gem_execbuffer_move_to_active(vmas, params->request);
  7648. i915_gem_execbuffer_retire_commands(params);
  7649. diff -Nur linux-4.4.62.orig/drivers/gpu/drm/i915/i915_gem_shrinker.c linux-4.4.62/drivers/gpu/drm/i915/i915_gem_shrinker.c
  7650. --- linux-4.4.62.orig/drivers/gpu/drm/i915/i915_gem_shrinker.c 2017-04-18 07:15:37.000000000 +0200
  7651. +++ linux-4.4.62/drivers/gpu/drm/i915/i915_gem_shrinker.c 2017-04-18 17:38:08.046643506 +0200
  7652. @@ -39,7 +39,7 @@
  7653. if (!mutex_is_locked(mutex))
  7654. return false;
  7655. -#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)
  7656. +#if (defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)) && !defined(CONFIG_PREEMPT_RT_BASE)
  7657. return mutex->owner == task;
  7658. #else
  7659. /* Since UP may be pre-empted, we cannot assume that we own the lock */
  7660. diff -Nur linux-4.4.62.orig/drivers/gpu/drm/i915/i915_irq.c linux-4.4.62/drivers/gpu/drm/i915/i915_irq.c
  7661. --- linux-4.4.62.orig/drivers/gpu/drm/i915/i915_irq.c 2017-04-18 07:15:37.000000000 +0200
  7662. +++ linux-4.4.62/drivers/gpu/drm/i915/i915_irq.c 2017-04-18 17:38:08.046643506 +0200
  7663. @@ -812,6 +812,7 @@
  7664. spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
  7665. /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
  7666. + preempt_disable_rt();
  7667. /* Get optional system timestamp before query. */
  7668. if (stime)
  7669. @@ -863,6 +864,7 @@
  7670. *etime = ktime_get();
  7671. /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
  7672. + preempt_enable_rt();
  7673. spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
  7674. diff -Nur linux-4.4.62.orig/drivers/gpu/drm/i915/intel_display.c linux-4.4.62/drivers/gpu/drm/i915/intel_display.c
  7675. --- linux-4.4.62.orig/drivers/gpu/drm/i915/intel_display.c 2017-04-18 07:15:37.000000000 +0200
  7676. +++ linux-4.4.62/drivers/gpu/drm/i915/intel_display.c 2017-04-18 17:38:08.050643661 +0200
  7677. @@ -11400,7 +11400,7 @@
  7678. struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
  7679. struct intel_unpin_work *work;
  7680. - WARN_ON(!in_interrupt());
  7681. + WARN_ON_NONRT(!in_interrupt());
  7682. if (crtc == NULL)
  7683. return;
  7684. diff -Nur linux-4.4.62.orig/drivers/gpu/drm/i915/intel_sprite.c linux-4.4.62/drivers/gpu/drm/i915/intel_sprite.c
  7685. --- linux-4.4.62.orig/drivers/gpu/drm/i915/intel_sprite.c 2017-04-18 07:15:37.000000000 +0200
  7686. +++ linux-4.4.62/drivers/gpu/drm/i915/intel_sprite.c 2017-04-18 17:38:08.050643661 +0200
  7687. @@ -38,6 +38,7 @@
  7688. #include "intel_drv.h"
  7689. #include <drm/i915_drm.h>
  7690. #include "i915_drv.h"
  7691. +#include <linux/locallock.h>
  7692. static bool
  7693. format_is_yuv(uint32_t format)
  7694. @@ -64,6 +65,8 @@
  7695. 1000 * adjusted_mode->crtc_htotal);
  7696. }
  7697. +static DEFINE_LOCAL_IRQ_LOCK(pipe_update_lock);
  7698. +
  7699. /**
  7700. * intel_pipe_update_start() - start update of a set of display registers
  7701. * @crtc: the crtc of which the registers are going to be updated
  7702. @@ -96,7 +99,7 @@
  7703. min = vblank_start - usecs_to_scanlines(adjusted_mode, 100);
  7704. max = vblank_start - 1;
  7705. - local_irq_disable();
  7706. + local_lock_irq(pipe_update_lock);
  7707. if (min <= 0 || max <= 0)
  7708. return;
  7709. @@ -126,11 +129,11 @@
  7710. break;
  7711. }
  7712. - local_irq_enable();
  7713. + local_unlock_irq(pipe_update_lock);
  7714. timeout = schedule_timeout(timeout);
  7715. - local_irq_disable();
  7716. + local_lock_irq(pipe_update_lock);
  7717. }
  7718. finish_wait(wq, &wait);
  7719. @@ -164,7 +167,7 @@
  7720. trace_i915_pipe_update_end(crtc, end_vbl_count, scanline_end);
  7721. - local_irq_enable();
  7722. + local_unlock_irq(pipe_update_lock);
  7723. if (crtc->debug.start_vbl_count &&
  7724. crtc->debug.start_vbl_count != end_vbl_count) {
  7725. diff -Nur linux-4.4.62.orig/drivers/gpu/drm/radeon/radeon_display.c linux-4.4.62/drivers/gpu/drm/radeon/radeon_display.c
  7726. --- linux-4.4.62.orig/drivers/gpu/drm/radeon/radeon_display.c 2017-04-18 07:15:37.000000000 +0200
  7727. +++ linux-4.4.62/drivers/gpu/drm/radeon/radeon_display.c 2017-04-18 17:38:08.050643661 +0200
  7728. @@ -1862,6 +1862,7 @@
  7729. struct radeon_device *rdev = dev->dev_private;
  7730. /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
  7731. + preempt_disable_rt();
  7732. /* Get optional system timestamp before query. */
  7733. if (stime)
  7734. @@ -1954,6 +1955,7 @@
  7735. *etime = ktime_get();
  7736. /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
  7737. + preempt_enable_rt();
  7738. /* Decode into vertical and horizontal scanout position. */
  7739. *vpos = position & 0x1fff;
  7740. diff -Nur linux-4.4.62.orig/drivers/hv/vmbus_drv.c linux-4.4.62/drivers/hv/vmbus_drv.c
  7741. --- linux-4.4.62.orig/drivers/hv/vmbus_drv.c 2017-04-18 07:15:37.000000000 +0200
  7742. +++ linux-4.4.62/drivers/hv/vmbus_drv.c 2017-04-18 17:38:08.050643661 +0200
  7743. @@ -820,7 +820,7 @@
  7744. tasklet_schedule(&msg_dpc);
  7745. }
  7746. - add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0);
  7747. + add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0, 0);
  7748. }
  7749. diff -Nur linux-4.4.62.orig/drivers/i2c/busses/i2c-omap.c linux-4.4.62/drivers/i2c/busses/i2c-omap.c
  7750. --- linux-4.4.62.orig/drivers/i2c/busses/i2c-omap.c 2017-04-18 07:15:37.000000000 +0200
  7751. +++ linux-4.4.62/drivers/i2c/busses/i2c-omap.c 2017-04-18 17:38:08.050643661 +0200
  7752. @@ -995,15 +995,12 @@
  7753. u16 mask;
  7754. u16 stat;
  7755. - spin_lock(&omap->lock);
  7756. - mask = omap_i2c_read_reg(omap, OMAP_I2C_IE_REG);
  7757. stat = omap_i2c_read_reg(omap, OMAP_I2C_STAT_REG);
  7758. + mask = omap_i2c_read_reg(omap, OMAP_I2C_IE_REG);
  7759. if (stat & mask)
  7760. ret = IRQ_WAKE_THREAD;
  7761. - spin_unlock(&omap->lock);
  7762. -
  7763. return ret;
  7764. }
  7765. diff -Nur linux-4.4.62.orig/drivers/ide/alim15x3.c linux-4.4.62/drivers/ide/alim15x3.c
  7766. --- linux-4.4.62.orig/drivers/ide/alim15x3.c 2017-04-18 07:15:37.000000000 +0200
  7767. +++ linux-4.4.62/drivers/ide/alim15x3.c 2017-04-18 17:38:08.050643661 +0200
  7768. @@ -234,7 +234,7 @@
  7769. isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL);
  7770. - local_irq_save(flags);
  7771. + local_irq_save_nort(flags);
  7772. if (m5229_revision < 0xC2) {
  7773. /*
  7774. @@ -325,7 +325,7 @@
  7775. }
  7776. pci_dev_put(north);
  7777. pci_dev_put(isa_dev);
  7778. - local_irq_restore(flags);
  7779. + local_irq_restore_nort(flags);
  7780. return 0;
  7781. }
  7782. diff -Nur linux-4.4.62.orig/drivers/ide/hpt366.c linux-4.4.62/drivers/ide/hpt366.c
  7783. --- linux-4.4.62.orig/drivers/ide/hpt366.c 2017-04-18 07:15:37.000000000 +0200
  7784. +++ linux-4.4.62/drivers/ide/hpt366.c 2017-04-18 17:38:08.050643661 +0200
  7785. @@ -1241,7 +1241,7 @@
  7786. dma_old = inb(base + 2);
  7787. - local_irq_save(flags);
  7788. + local_irq_save_nort(flags);
  7789. dma_new = dma_old;
  7790. pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma);
  7791. @@ -1252,7 +1252,7 @@
  7792. if (dma_new != dma_old)
  7793. outb(dma_new, base + 2);
  7794. - local_irq_restore(flags);
  7795. + local_irq_restore_nort(flags);
  7796. printk(KERN_INFO " %s: BM-DMA at 0x%04lx-0x%04lx\n",
  7797. hwif->name, base, base + 7);
  7798. diff -Nur linux-4.4.62.orig/drivers/ide/ide-io.c linux-4.4.62/drivers/ide/ide-io.c
  7799. --- linux-4.4.62.orig/drivers/ide/ide-io.c 2017-04-18 07:15:37.000000000 +0200
  7800. +++ linux-4.4.62/drivers/ide/ide-io.c 2017-04-18 17:38:08.050643661 +0200
  7801. @@ -659,7 +659,7 @@
  7802. /* disable_irq_nosync ?? */
  7803. disable_irq(hwif->irq);
  7804. /* local CPU only, as if we were handling an interrupt */
  7805. - local_irq_disable();
  7806. + local_irq_disable_nort();
  7807. if (hwif->polling) {
  7808. startstop = handler(drive);
  7809. } else if (drive_is_ready(drive)) {
  7810. diff -Nur linux-4.4.62.orig/drivers/ide/ide-iops.c linux-4.4.62/drivers/ide/ide-iops.c
  7811. --- linux-4.4.62.orig/drivers/ide/ide-iops.c 2017-04-18 07:15:37.000000000 +0200
  7812. +++ linux-4.4.62/drivers/ide/ide-iops.c 2017-04-18 17:38:08.050643661 +0200
  7813. @@ -129,12 +129,12 @@
  7814. if ((stat & ATA_BUSY) == 0)
  7815. break;
  7816. - local_irq_restore(flags);
  7817. + local_irq_restore_nort(flags);
  7818. *rstat = stat;
  7819. return -EBUSY;
  7820. }
  7821. }
  7822. - local_irq_restore(flags);
  7823. + local_irq_restore_nort(flags);
  7824. }
  7825. /*
  7826. * Allow status to settle, then read it again.
  7827. diff -Nur linux-4.4.62.orig/drivers/ide/ide-io-std.c linux-4.4.62/drivers/ide/ide-io-std.c
  7828. --- linux-4.4.62.orig/drivers/ide/ide-io-std.c 2017-04-18 07:15:37.000000000 +0200
  7829. +++ linux-4.4.62/drivers/ide/ide-io-std.c 2017-04-18 17:38:08.050643661 +0200
  7830. @@ -175,7 +175,7 @@
  7831. unsigned long uninitialized_var(flags);
  7832. if ((io_32bit & 2) && !mmio) {
  7833. - local_irq_save(flags);
  7834. + local_irq_save_nort(flags);
  7835. ata_vlb_sync(io_ports->nsect_addr);
  7836. }
  7837. @@ -186,7 +186,7 @@
  7838. insl(data_addr, buf, words);
  7839. if ((io_32bit & 2) && !mmio)
  7840. - local_irq_restore(flags);
  7841. + local_irq_restore_nort(flags);
  7842. if (((len + 1) & 3) < 2)
  7843. return;
  7844. @@ -219,7 +219,7 @@
  7845. unsigned long uninitialized_var(flags);
  7846. if ((io_32bit & 2) && !mmio) {
  7847. - local_irq_save(flags);
  7848. + local_irq_save_nort(flags);
  7849. ata_vlb_sync(io_ports->nsect_addr);
  7850. }
  7851. @@ -230,7 +230,7 @@
  7852. outsl(data_addr, buf, words);
  7853. if ((io_32bit & 2) && !mmio)
  7854. - local_irq_restore(flags);
  7855. + local_irq_restore_nort(flags);
  7856. if (((len + 1) & 3) < 2)
  7857. return;
  7858. diff -Nur linux-4.4.62.orig/drivers/ide/ide-probe.c linux-4.4.62/drivers/ide/ide-probe.c
  7859. --- linux-4.4.62.orig/drivers/ide/ide-probe.c 2017-04-18 07:15:37.000000000 +0200
  7860. +++ linux-4.4.62/drivers/ide/ide-probe.c 2017-04-18 17:38:08.050643661 +0200
  7861. @@ -196,10 +196,10 @@
  7862. int bswap = 1;
  7863. /* local CPU only; some systems need this */
  7864. - local_irq_save(flags);
  7865. + local_irq_save_nort(flags);
  7866. /* read 512 bytes of id info */
  7867. hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE);
  7868. - local_irq_restore(flags);
  7869. + local_irq_restore_nort(flags);
  7870. drive->dev_flags |= IDE_DFLAG_ID_READ;
  7871. #ifdef DEBUG
  7872. diff -Nur linux-4.4.62.orig/drivers/ide/ide-taskfile.c linux-4.4.62/drivers/ide/ide-taskfile.c
  7873. --- linux-4.4.62.orig/drivers/ide/ide-taskfile.c 2017-04-18 07:15:37.000000000 +0200
  7874. +++ linux-4.4.62/drivers/ide/ide-taskfile.c 2017-04-18 17:38:08.050643661 +0200
  7875. @@ -250,7 +250,7 @@
  7876. page_is_high = PageHighMem(page);
  7877. if (page_is_high)
  7878. - local_irq_save(flags);
  7879. + local_irq_save_nort(flags);
  7880. buf = kmap_atomic(page) + offset;
  7881. @@ -271,7 +271,7 @@
  7882. kunmap_atomic(buf);
  7883. if (page_is_high)
  7884. - local_irq_restore(flags);
  7885. + local_irq_restore_nort(flags);
  7886. len -= nr_bytes;
  7887. }
  7888. @@ -414,7 +414,7 @@
  7889. }
  7890. if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0)
  7891. - local_irq_disable();
  7892. + local_irq_disable_nort();
  7893. ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE);
  7894. diff -Nur linux-4.4.62.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c linux-4.4.62/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
  7895. --- linux-4.4.62.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2017-04-18 07:15:37.000000000 +0200
  7896. +++ linux-4.4.62/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2017-04-18 17:38:08.050643661 +0200
  7897. @@ -862,7 +862,7 @@
  7898. ipoib_dbg_mcast(priv, "restarting multicast task\n");
  7899. - local_irq_save(flags);
  7900. + local_irq_save_nort(flags);
  7901. netif_addr_lock(dev);
  7902. spin_lock(&priv->lock);
  7903. @@ -944,7 +944,7 @@
  7904. spin_unlock(&priv->lock);
  7905. netif_addr_unlock(dev);
  7906. - local_irq_restore(flags);
  7907. + local_irq_restore_nort(flags);
  7908. /*
  7909. * make sure the in-flight joins have finished before we attempt
  7910. diff -Nur linux-4.4.62.orig/drivers/input/gameport/gameport.c linux-4.4.62/drivers/input/gameport/gameport.c
  7911. --- linux-4.4.62.orig/drivers/input/gameport/gameport.c 2017-04-18 07:15:37.000000000 +0200
  7912. +++ linux-4.4.62/drivers/input/gameport/gameport.c 2017-04-18 17:38:08.054643816 +0200
  7913. @@ -91,13 +91,13 @@
  7914. tx = ~0;
  7915. for (i = 0; i < 50; i++) {
  7916. - local_irq_save(flags);
  7917. + local_irq_save_nort(flags);
  7918. t1 = ktime_get_ns();
  7919. for (t = 0; t < 50; t++)
  7920. gameport_read(gameport);
  7921. t2 = ktime_get_ns();
  7922. t3 = ktime_get_ns();
  7923. - local_irq_restore(flags);
  7924. + local_irq_restore_nort(flags);
  7925. udelay(i * 10);
  7926. t = (t2 - t1) - (t3 - t2);
  7927. if (t < tx)
  7928. @@ -124,12 +124,12 @@
  7929. tx = 1 << 30;
  7930. for(i = 0; i < 50; i++) {
  7931. - local_irq_save(flags);
  7932. + local_irq_save_nort(flags);
  7933. GET_TIME(t1);
  7934. for (t = 0; t < 50; t++) gameport_read(gameport);
  7935. GET_TIME(t2);
  7936. GET_TIME(t3);
  7937. - local_irq_restore(flags);
  7938. + local_irq_restore_nort(flags);
  7939. udelay(i * 10);
  7940. if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t;
  7941. }
  7942. @@ -148,11 +148,11 @@
  7943. tx = 1 << 30;
  7944. for(i = 0; i < 50; i++) {
  7945. - local_irq_save(flags);
  7946. + local_irq_save_nort(flags);
  7947. t1 = rdtsc();
  7948. for (t = 0; t < 50; t++) gameport_read(gameport);
  7949. t2 = rdtsc();
  7950. - local_irq_restore(flags);
  7951. + local_irq_restore_nort(flags);
  7952. udelay(i * 10);
  7953. if (t2 - t1 < tx) tx = t2 - t1;
  7954. }
  7955. diff -Nur linux-4.4.62.orig/drivers/iommu/amd_iommu.c linux-4.4.62/drivers/iommu/amd_iommu.c
  7956. --- linux-4.4.62.orig/drivers/iommu/amd_iommu.c 2017-04-18 07:15:37.000000000 +0200
  7957. +++ linux-4.4.62/drivers/iommu/amd_iommu.c 2017-04-18 17:38:08.054643816 +0200
  7958. @@ -2022,10 +2022,10 @@
  7959. int ret;
  7960. /*
  7961. - * Must be called with IRQs disabled. Warn here to detect early
  7962. - * when its not.
  7963. + * Must be called with IRQs disabled on a non RT kernel. Warn here to
  7964. + * detect early when its not.
  7965. */
  7966. - WARN_ON(!irqs_disabled());
  7967. + WARN_ON_NONRT(!irqs_disabled());
  7968. /* lock domain */
  7969. spin_lock(&domain->lock);
  7970. @@ -2188,10 +2188,10 @@
  7971. struct protection_domain *domain;
  7972. /*
  7973. - * Must be called with IRQs disabled. Warn here to detect early
  7974. - * when its not.
  7975. + * Must be called with IRQs disabled on a non RT kernel. Warn here to
  7976. + * detect early when its not.
  7977. */
  7978. - WARN_ON(!irqs_disabled());
  7979. + WARN_ON_NONRT(!irqs_disabled());
  7980. if (WARN_ON(!dev_data->domain))
  7981. return;
  7982. diff -Nur linux-4.4.62.orig/drivers/leds/trigger/Kconfig linux-4.4.62/drivers/leds/trigger/Kconfig
  7983. --- linux-4.4.62.orig/drivers/leds/trigger/Kconfig 2017-04-18 07:15:37.000000000 +0200
  7984. +++ linux-4.4.62/drivers/leds/trigger/Kconfig 2017-04-18 17:38:08.054643816 +0200
  7985. @@ -61,7 +61,7 @@
  7986. config LEDS_TRIGGER_CPU
  7987. bool "LED CPU Trigger"
  7988. - depends on LEDS_TRIGGERS
  7989. + depends on LEDS_TRIGGERS && !PREEMPT_RT_BASE
  7990. help
  7991. This allows LEDs to be controlled by active CPUs. This shows
  7992. the active CPUs across an array of LEDs so you can see which
  7993. diff -Nur linux-4.4.62.orig/drivers/md/bcache/Kconfig linux-4.4.62/drivers/md/bcache/Kconfig
  7994. --- linux-4.4.62.orig/drivers/md/bcache/Kconfig 2017-04-18 07:15:37.000000000 +0200
  7995. +++ linux-4.4.62/drivers/md/bcache/Kconfig 2017-04-18 17:38:08.054643816 +0200
  7996. @@ -1,6 +1,7 @@
  7997. config BCACHE
  7998. tristate "Block device as cache"
  7999. + depends on !PREEMPT_RT_FULL
  8000. ---help---
  8001. Allows a block device to be used as cache for other devices; uses
  8002. a btree for indexing and the layout is optimized for SSDs.
  8003. diff -Nur linux-4.4.62.orig/drivers/md/dm.c linux-4.4.62/drivers/md/dm.c
  8004. --- linux-4.4.62.orig/drivers/md/dm.c 2017-04-18 07:15:37.000000000 +0200
  8005. +++ linux-4.4.62/drivers/md/dm.c 2017-04-18 17:38:08.054643816 +0200
  8006. @@ -2185,7 +2185,7 @@
  8007. /* Establish tio->ti before queuing work (map_tio_request) */
  8008. tio->ti = ti;
  8009. queue_kthread_work(&md->kworker, &tio->work);
  8010. - BUG_ON(!irqs_disabled());
  8011. + BUG_ON_NONRT(!irqs_disabled());
  8012. }
  8013. goto out;
  8014. diff -Nur linux-4.4.62.orig/drivers/md/raid5.c linux-4.4.62/drivers/md/raid5.c
  8015. --- linux-4.4.62.orig/drivers/md/raid5.c 2017-04-18 07:15:37.000000000 +0200
  8016. +++ linux-4.4.62/drivers/md/raid5.c 2017-04-18 17:38:08.054643816 +0200
  8017. @@ -1920,8 +1920,9 @@
  8018. struct raid5_percpu *percpu;
  8019. unsigned long cpu;
  8020. - cpu = get_cpu();
  8021. + cpu = get_cpu_light();
  8022. percpu = per_cpu_ptr(conf->percpu, cpu);
  8023. + spin_lock(&percpu->lock);
  8024. if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
  8025. ops_run_biofill(sh);
  8026. overlap_clear++;
  8027. @@ -1977,7 +1978,8 @@
  8028. if (test_and_clear_bit(R5_Overlap, &dev->flags))
  8029. wake_up(&sh->raid_conf->wait_for_overlap);
  8030. }
  8031. - put_cpu();
  8032. + spin_unlock(&percpu->lock);
  8033. + put_cpu_light();
  8034. }
  8035. static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp)
  8036. @@ -6414,6 +6416,7 @@
  8037. __func__, cpu);
  8038. break;
  8039. }
  8040. + spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock);
  8041. }
  8042. put_online_cpus();
  8043. diff -Nur linux-4.4.62.orig/drivers/md/raid5.h linux-4.4.62/drivers/md/raid5.h
  8044. --- linux-4.4.62.orig/drivers/md/raid5.h 2017-04-18 07:15:37.000000000 +0200
  8045. +++ linux-4.4.62/drivers/md/raid5.h 2017-04-18 17:38:08.054643816 +0200
  8046. @@ -504,6 +504,7 @@
  8047. int recovery_disabled;
  8048. /* per cpu variables */
  8049. struct raid5_percpu {
  8050. + spinlock_t lock; /* Protection for -RT */
  8051. struct page *spare_page; /* Used when checking P/Q in raid6 */
  8052. struct flex_array *scribble; /* space for constructing buffer
  8053. * lists and performing address
  8054. diff -Nur linux-4.4.62.orig/drivers/media/platform/vsp1/vsp1_video.c linux-4.4.62/drivers/media/platform/vsp1/vsp1_video.c
  8055. --- linux-4.4.62.orig/drivers/media/platform/vsp1/vsp1_video.c 2017-04-18 07:15:37.000000000 +0200
  8056. +++ linux-4.4.62/drivers/media/platform/vsp1/vsp1_video.c 2017-04-18 17:38:08.054643816 +0200
  8057. @@ -520,7 +520,7 @@
  8058. bool stopped;
  8059. spin_lock_irqsave(&pipe->irqlock, flags);
  8060. - stopped = pipe->state == VSP1_PIPELINE_STOPPED,
  8061. + stopped = pipe->state == VSP1_PIPELINE_STOPPED;
  8062. spin_unlock_irqrestore(&pipe->irqlock, flags);
  8063. return stopped;
  8064. diff -Nur linux-4.4.62.orig/drivers/misc/hwlat_detector.c linux-4.4.62/drivers/misc/hwlat_detector.c
  8065. --- linux-4.4.62.orig/drivers/misc/hwlat_detector.c 1970-01-01 01:00:00.000000000 +0100
  8066. +++ linux-4.4.62/drivers/misc/hwlat_detector.c 2017-04-18 17:38:08.054643816 +0200
  8067. @@ -0,0 +1,1240 @@
  8068. +/*
  8069. + * hwlat_detector.c - A simple Hardware Latency detector.
  8070. + *
  8071. + * Use this module to detect large system latencies induced by the behavior of
  8072. + * certain underlying system hardware or firmware, independent of Linux itself.
  8073. + * The code was developed originally to detect the presence of SMIs on Intel
  8074. + * and AMD systems, although there is no dependency upon x86 herein.
  8075. + *
  8076. + * The classical example usage of this module is in detecting the presence of
  8077. + * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a
  8078. + * somewhat special form of hardware interrupt spawned from earlier CPU debug
  8079. + * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge
  8080. + * LPC (or other device) to generate a special interrupt under certain
  8081. + * circumstances, for example, upon expiration of a special SMI timer device,
  8082. + * due to certain external thermal readings, on certain I/O address accesses,
  8083. + * and other situations. An SMI hits a special CPU pin, triggers a special
  8084. + * SMI mode (complete with special memory map), and the OS is unaware.
  8085. + *
  8086. + * Although certain hardware-inducing latencies are necessary (for example,
  8087. + * a modern system often requires an SMI handler for correct thermal control
  8088. + * and remote management) they can wreak havoc upon any OS-level performance
  8089. + * guarantees toward low-latency, especially when the OS is not even made
  8090. + * aware of the presence of these interrupts. For this reason, we need a
  8091. + * somewhat brute force mechanism to detect these interrupts. In this case,
  8092. + * we do it by hogging all of the CPU(s) for configurable timer intervals,
  8093. + * sampling the built-in CPU timer, looking for discontiguous readings.
  8094. + *
  8095. + * WARNING: This implementation necessarily introduces latencies. Therefore,
  8096. + * you should NEVER use this module in a production environment
  8097. + * requiring any kind of low-latency performance guarantee(s).
  8098. + *
  8099. + * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
  8100. + *
  8101. + * Includes useful feedback from Clark Williams <clark@redhat.com>
  8102. + *
  8103. + * This file is licensed under the terms of the GNU General Public
  8104. + * License version 2. This program is licensed "as is" without any
  8105. + * warranty of any kind, whether express or implied.
  8106. + */
  8107. +
  8108. +#include <linux/module.h>
  8109. +#include <linux/init.h>
  8110. +#include <linux/ring_buffer.h>
  8111. +#include <linux/time.h>
  8112. +#include <linux/hrtimer.h>
  8113. +#include <linux/kthread.h>
  8114. +#include <linux/debugfs.h>
  8115. +#include <linux/seq_file.h>
  8116. +#include <linux/uaccess.h>
  8117. +#include <linux/version.h>
  8118. +#include <linux/delay.h>
  8119. +#include <linux/slab.h>
  8120. +#include <linux/trace_clock.h>
  8121. +
  8122. +#define BUF_SIZE_DEFAULT 262144UL /* 8K*(sizeof(entry)) */
  8123. +#define BUF_FLAGS (RB_FL_OVERWRITE) /* no block on full */
  8124. +#define U64STR_SIZE 22 /* 20 digits max */
  8125. +
  8126. +#define VERSION "1.0.0"
  8127. +#define BANNER "hwlat_detector: "
  8128. +#define DRVNAME "hwlat_detector"
  8129. +#define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */
  8130. +#define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */
  8131. +#define DEFAULT_LAT_THRESHOLD 10 /* 10us */
  8132. +
  8133. +/* Module metadata */
  8134. +
  8135. +MODULE_LICENSE("GPL");
  8136. +MODULE_AUTHOR("Jon Masters <jcm@redhat.com>");
  8137. +MODULE_DESCRIPTION("A simple hardware latency detector");
  8138. +MODULE_VERSION(VERSION);
  8139. +
  8140. +/* Module parameters */
  8141. +
  8142. +static int debug;
  8143. +static int enabled;
  8144. +static int threshold;
  8145. +
  8146. +module_param(debug, int, 0); /* enable debug */
  8147. +module_param(enabled, int, 0); /* enable detector */
  8148. +module_param(threshold, int, 0); /* latency threshold */
  8149. +
  8150. +/* Buffering and sampling */
  8151. +
  8152. +static struct ring_buffer *ring_buffer; /* sample buffer */
  8153. +static DEFINE_MUTEX(ring_buffer_mutex); /* lock changes */
  8154. +static unsigned long buf_size = BUF_SIZE_DEFAULT;
  8155. +static struct task_struct *kthread; /* sampling thread */
  8156. +
  8157. +/* DebugFS filesystem entries */
  8158. +
  8159. +static struct dentry *debug_dir; /* debugfs directory */
  8160. +static struct dentry *debug_max; /* maximum TSC delta */
  8161. +static struct dentry *debug_count; /* total detect count */
  8162. +static struct dentry *debug_sample_width; /* sample width us */
  8163. +static struct dentry *debug_sample_window; /* sample window us */
  8164. +static struct dentry *debug_sample; /* raw samples us */
  8165. +static struct dentry *debug_threshold; /* threshold us */
  8166. +static struct dentry *debug_enable; /* enable/disable */
  8167. +
  8168. +/* Individual samples and global state */
  8169. +
  8170. +struct sample; /* latency sample */
  8171. +struct data; /* Global state */
  8172. +
  8173. +/* Sampling functions */
  8174. +static int __buffer_add_sample(struct sample *sample);
  8175. +static struct sample *buffer_get_sample(struct sample *sample);
  8176. +
  8177. +/* Threading and state */
  8178. +static int kthread_fn(void *unused);
  8179. +static int start_kthread(void);
  8180. +static int stop_kthread(void);
  8181. +static void __reset_stats(void);
  8182. +static int init_stats(void);
  8183. +
  8184. +/* Debugfs interface */
  8185. +static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
  8186. + size_t cnt, loff_t *ppos, const u64 *entry);
  8187. +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
  8188. + size_t cnt, loff_t *ppos, u64 *entry);
  8189. +static int debug_sample_fopen(struct inode *inode, struct file *filp);
  8190. +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
  8191. + size_t cnt, loff_t *ppos);
  8192. +static int debug_sample_release(struct inode *inode, struct file *filp);
  8193. +static int debug_enable_fopen(struct inode *inode, struct file *filp);
  8194. +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
  8195. + size_t cnt, loff_t *ppos);
  8196. +static ssize_t debug_enable_fwrite(struct file *file,
  8197. + const char __user *user_buffer,
  8198. + size_t user_size, loff_t *offset);
  8199. +
  8200. +/* Initialization functions */
  8201. +static int init_debugfs(void);
  8202. +static void free_debugfs(void);
  8203. +static int detector_init(void);
  8204. +static void detector_exit(void);
  8205. +
  8206. +/* Individual latency samples are stored here when detected and packed into
  8207. + * the ring_buffer circular buffer, where they are overwritten when
  8208. + * more than buf_size/sizeof(sample) samples are received. */
  8209. +struct sample {
  8210. + u64 seqnum; /* unique sequence */
  8211. + u64 duration; /* ktime delta */
  8212. + u64 outer_duration; /* ktime delta (outer loop) */
  8213. + struct timespec timestamp; /* wall time */
  8214. + unsigned long lost;
  8215. +};
  8216. +
  8217. +/* keep the global state somewhere. */
  8218. +static struct data {
  8219. +
  8220. + struct mutex lock; /* protect changes */
  8221. +
  8222. + u64 count; /* total since reset */
  8223. + u64 max_sample; /* max hardware latency */
  8224. + u64 threshold; /* sample threshold level */
  8225. +
  8226. + u64 sample_window; /* total sampling window (on+off) */
  8227. + u64 sample_width; /* active sampling portion of window */
  8228. +
  8229. + atomic_t sample_open; /* whether the sample file is open */
  8230. +
  8231. + wait_queue_head_t wq; /* waitqeue for new sample values */
  8232. +
  8233. +} data;
  8234. +
  8235. +/**
  8236. + * __buffer_add_sample - add a new latency sample recording to the ring buffer
  8237. + * @sample: The new latency sample value
  8238. + *
  8239. + * This receives a new latency sample and records it in a global ring buffer.
  8240. + * No additional locking is used in this case.
  8241. + */
  8242. +static int __buffer_add_sample(struct sample *sample)
  8243. +{
  8244. + return ring_buffer_write(ring_buffer,
  8245. + sizeof(struct sample), sample);
  8246. +}
  8247. +
  8248. +/**
  8249. + * buffer_get_sample - remove a hardware latency sample from the ring buffer
  8250. + * @sample: Pre-allocated storage for the sample
  8251. + *
  8252. + * This retrieves a hardware latency sample from the global circular buffer
  8253. + */
  8254. +static struct sample *buffer_get_sample(struct sample *sample)
  8255. +{
  8256. + struct ring_buffer_event *e = NULL;
  8257. + struct sample *s = NULL;
  8258. + unsigned int cpu = 0;
  8259. +
  8260. + if (!sample)
  8261. + return NULL;
  8262. +
  8263. + mutex_lock(&ring_buffer_mutex);
  8264. + for_each_online_cpu(cpu) {
  8265. + e = ring_buffer_consume(ring_buffer, cpu, NULL, &sample->lost);
  8266. + if (e)
  8267. + break;
  8268. + }
  8269. +
  8270. + if (e) {
  8271. + s = ring_buffer_event_data(e);
  8272. + memcpy(sample, s, sizeof(struct sample));
  8273. + } else
  8274. + sample = NULL;
  8275. + mutex_unlock(&ring_buffer_mutex);
  8276. +
  8277. + return sample;
  8278. +}
  8279. +
  8280. +#ifndef CONFIG_TRACING
  8281. +#define time_type ktime_t
  8282. +#define time_get() ktime_get()
  8283. +#define time_to_us(x) ktime_to_us(x)
  8284. +#define time_sub(a, b) ktime_sub(a, b)
  8285. +#define init_time(a, b) (a).tv64 = b
  8286. +#define time_u64(a) ((a).tv64)
  8287. +#else
  8288. +#define time_type u64
  8289. +#define time_get() trace_clock_local()
  8290. +#define time_to_us(x) div_u64(x, 1000)
  8291. +#define time_sub(a, b) ((a) - (b))
  8292. +#define init_time(a, b) (a = b)
  8293. +#define time_u64(a) a
  8294. +#endif
  8295. +/**
  8296. + * get_sample - sample the CPU TSC and look for likely hardware latencies
  8297. + *
  8298. + * Used to repeatedly capture the CPU TSC (or similar), looking for potential
  8299. + * hardware-induced latency. Called with interrupts disabled and with
  8300. + * data.lock held.
  8301. + */
  8302. +static int get_sample(void)
  8303. +{
  8304. + time_type start, t1, t2, last_t2;
  8305. + s64 diff, total = 0;
  8306. + u64 sample = 0;
  8307. + u64 outer_sample = 0;
  8308. + int ret = -1;
  8309. +
  8310. + init_time(last_t2, 0);
  8311. + start = time_get(); /* start timestamp */
  8312. +
  8313. + do {
  8314. +
  8315. + t1 = time_get(); /* we'll look for a discontinuity */
  8316. + t2 = time_get();
  8317. +
  8318. + if (time_u64(last_t2)) {
  8319. + /* Check the delta from outer loop (t2 to next t1) */
  8320. + diff = time_to_us(time_sub(t1, last_t2));
  8321. + /* This shouldn't happen */
  8322. + if (diff < 0) {
  8323. + pr_err(BANNER "time running backwards\n");
  8324. + goto out;
  8325. + }
  8326. + if (diff > outer_sample)
  8327. + outer_sample = diff;
  8328. + }
  8329. + last_t2 = t2;
  8330. +
  8331. + total = time_to_us(time_sub(t2, start)); /* sample width */
  8332. +
  8333. + /* This checks the inner loop (t1 to t2) */
  8334. + diff = time_to_us(time_sub(t2, t1)); /* current diff */
  8335. +
  8336. + /* This shouldn't happen */
  8337. + if (diff < 0) {
  8338. + pr_err(BANNER "time running backwards\n");
  8339. + goto out;
  8340. + }
  8341. +
  8342. + if (diff > sample)
  8343. + sample = diff; /* only want highest value */
  8344. +
  8345. + } while (total <= data.sample_width);
  8346. +
  8347. + ret = 0;
  8348. +
  8349. + /* If we exceed the threshold value, we have found a hardware latency */
  8350. + if (sample > data.threshold || outer_sample > data.threshold) {
  8351. + struct sample s;
  8352. +
  8353. + ret = 1;
  8354. +
  8355. + data.count++;
  8356. + s.seqnum = data.count;
  8357. + s.duration = sample;
  8358. + s.outer_duration = outer_sample;
  8359. + s.timestamp = CURRENT_TIME;
  8360. + __buffer_add_sample(&s);
  8361. +
  8362. + /* Keep a running maximum ever recorded hardware latency */
  8363. + if (sample > data.max_sample)
  8364. + data.max_sample = sample;
  8365. + }
  8366. +
  8367. +out:
  8368. + return ret;
  8369. +}
  8370. +
  8371. +/*
  8372. + * kthread_fn - The CPU time sampling/hardware latency detection kernel thread
  8373. + * @unused: A required part of the kthread API.
  8374. + *
  8375. + * Used to periodically sample the CPU TSC via a call to get_sample. We
  8376. + * disable interrupts, which does (intentionally) introduce latency since we
  8377. + * need to ensure nothing else might be running (and thus pre-empting).
  8378. + * Obviously this should never be used in production environments.
  8379. + *
  8380. + * Currently this runs on which ever CPU it was scheduled on, but most
  8381. + * real-worald hardware latency situations occur across several CPUs,
  8382. + * but we might later generalize this if we find there are any actualy
  8383. + * systems with alternate SMI delivery or other hardware latencies.
  8384. + */
  8385. +static int kthread_fn(void *unused)
  8386. +{
  8387. + int ret;
  8388. + u64 interval;
  8389. +
  8390. + while (!kthread_should_stop()) {
  8391. +
  8392. + mutex_lock(&data.lock);
  8393. +
  8394. + local_irq_disable();
  8395. + ret = get_sample();
  8396. + local_irq_enable();
  8397. +
  8398. + if (ret > 0)
  8399. + wake_up(&data.wq); /* wake up reader(s) */
  8400. +
  8401. + interval = data.sample_window - data.sample_width;
  8402. + do_div(interval, USEC_PER_MSEC); /* modifies interval value */
  8403. +
  8404. + mutex_unlock(&data.lock);
  8405. +
  8406. + if (msleep_interruptible(interval))
  8407. + break;
  8408. + }
  8409. +
  8410. + return 0;
  8411. +}
  8412. +
  8413. +/**
  8414. + * start_kthread - Kick off the hardware latency sampling/detector kthread
  8415. + *
  8416. + * This starts a kernel thread that will sit and sample the CPU timestamp
  8417. + * counter (TSC or similar) and look for potential hardware latencies.
  8418. + */
  8419. +static int start_kthread(void)
  8420. +{
  8421. + kthread = kthread_run(kthread_fn, NULL,
  8422. + DRVNAME);
  8423. + if (IS_ERR(kthread)) {
  8424. + pr_err(BANNER "could not start sampling thread\n");
  8425. + enabled = 0;
  8426. + return -ENOMEM;
  8427. + }
  8428. +
  8429. + return 0;
  8430. +}
  8431. +
  8432. +/**
  8433. + * stop_kthread - Inform the hardware latency samping/detector kthread to stop
  8434. + *
  8435. + * This kicks the running hardware latency sampling/detector kernel thread and
  8436. + * tells it to stop sampling now. Use this on unload and at system shutdown.
  8437. + */
  8438. +static int stop_kthread(void)
  8439. +{
  8440. + int ret;
  8441. +
  8442. + ret = kthread_stop(kthread);
  8443. +
  8444. + return ret;
  8445. +}
  8446. +
  8447. +/**
  8448. + * __reset_stats - Reset statistics for the hardware latency detector
  8449. + *
  8450. + * We use data to store various statistics and global state. We call this
  8451. + * function in order to reset those when "enable" is toggled on or off, and
  8452. + * also at initialization. Should be called with data.lock held.
  8453. + */
  8454. +static void __reset_stats(void)
  8455. +{
  8456. + data.count = 0;
  8457. + data.max_sample = 0;
  8458. + ring_buffer_reset(ring_buffer); /* flush out old sample entries */
  8459. +}
  8460. +
  8461. +/**
  8462. + * init_stats - Setup global state statistics for the hardware latency detector
  8463. + *
  8464. + * We use data to store various statistics and global state. We also use
  8465. + * a global ring buffer (ring_buffer) to keep raw samples of detected hardware
  8466. + * induced system latencies. This function initializes these structures and
  8467. + * allocates the global ring buffer also.
  8468. + */
  8469. +static int init_stats(void)
  8470. +{
  8471. + int ret = -ENOMEM;
  8472. +
  8473. + mutex_init(&data.lock);
  8474. + init_waitqueue_head(&data.wq);
  8475. + atomic_set(&data.sample_open, 0);
  8476. +
  8477. + ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS);
  8478. +
  8479. + if (WARN(!ring_buffer, KERN_ERR BANNER
  8480. + "failed to allocate ring buffer!\n"))
  8481. + goto out;
  8482. +
  8483. + __reset_stats();
  8484. + data.threshold = threshold ?: DEFAULT_LAT_THRESHOLD; /* threshold us */
  8485. + data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */
  8486. + data.sample_width = DEFAULT_SAMPLE_WIDTH; /* width us */
  8487. +
  8488. + ret = 0;
  8489. +
  8490. +out:
  8491. + return ret;
  8492. +
  8493. +}
  8494. +
  8495. +/*
  8496. + * simple_data_read - Wrapper read function for global state debugfs entries
  8497. + * @filp: The active open file structure for the debugfs "file"
  8498. + * @ubuf: The userspace provided buffer to read value into
  8499. + * @cnt: The maximum number of bytes to read
  8500. + * @ppos: The current "file" position
  8501. + * @entry: The entry to read from
  8502. + *
  8503. + * This function provides a generic read implementation for the global state
  8504. + * "data" structure debugfs filesystem entries. It would be nice to use
  8505. + * simple_attr_read directly, but we need to make sure that the data.lock
  8506. + * is held during the actual read.
  8507. + */
  8508. +static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
  8509. + size_t cnt, loff_t *ppos, const u64 *entry)
  8510. +{
  8511. + char buf[U64STR_SIZE];
  8512. + u64 val = 0;
  8513. + int len = 0;
  8514. +
  8515. + memset(buf, 0, sizeof(buf));
  8516. +
  8517. + if (!entry)
  8518. + return -EFAULT;
  8519. +
  8520. + mutex_lock(&data.lock);
  8521. + val = *entry;
  8522. + mutex_unlock(&data.lock);
  8523. +
  8524. + len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val);
  8525. +
  8526. + return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
  8527. +
  8528. +}
  8529. +
  8530. +/*
  8531. + * simple_data_write - Wrapper write function for global state debugfs entries
  8532. + * @filp: The active open file structure for the debugfs "file"
  8533. + * @ubuf: The userspace provided buffer to write value from
  8534. + * @cnt: The maximum number of bytes to write
  8535. + * @ppos: The current "file" position
  8536. + * @entry: The entry to write to
  8537. + *
  8538. + * This function provides a generic write implementation for the global state
  8539. + * "data" structure debugfs filesystem entries. It would be nice to use
  8540. + * simple_attr_write directly, but we need to make sure that the data.lock
  8541. + * is held during the actual write.
  8542. + */
  8543. +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
  8544. + size_t cnt, loff_t *ppos, u64 *entry)
  8545. +{
  8546. + char buf[U64STR_SIZE];
  8547. + int csize = min(cnt, sizeof(buf));
  8548. + u64 val = 0;
  8549. + int err = 0;
  8550. +
  8551. + memset(buf, '\0', sizeof(buf));
  8552. + if (copy_from_user(buf, ubuf, csize))
  8553. + return -EFAULT;
  8554. +
  8555. + buf[U64STR_SIZE-1] = '\0'; /* just in case */
  8556. + err = kstrtoull(buf, 10, &val);
  8557. + if (err)
  8558. + return -EINVAL;
  8559. +
  8560. + mutex_lock(&data.lock);
  8561. + *entry = val;
  8562. + mutex_unlock(&data.lock);
  8563. +
  8564. + return csize;
  8565. +}
  8566. +
  8567. +/**
  8568. + * debug_count_fopen - Open function for "count" debugfs entry
  8569. + * @inode: The in-kernel inode representation of the debugfs "file"
  8570. + * @filp: The active open file structure for the debugfs "file"
  8571. + *
  8572. + * This function provides an open implementation for the "count" debugfs
  8573. + * interface to the hardware latency detector.
  8574. + */
  8575. +static int debug_count_fopen(struct inode *inode, struct file *filp)
  8576. +{
  8577. + return 0;
  8578. +}
  8579. +
  8580. +/**
  8581. + * debug_count_fread - Read function for "count" debugfs entry
  8582. + * @filp: The active open file structure for the debugfs "file"
  8583. + * @ubuf: The userspace provided buffer to read value into
  8584. + * @cnt: The maximum number of bytes to read
  8585. + * @ppos: The current "file" position
  8586. + *
  8587. + * This function provides a read implementation for the "count" debugfs
  8588. + * interface to the hardware latency detector. Can be used to read the
  8589. + * number of latency readings exceeding the configured threshold since
  8590. + * the detector was last reset (e.g. by writing a zero into "count").
  8591. + */
  8592. +static ssize_t debug_count_fread(struct file *filp, char __user *ubuf,
  8593. + size_t cnt, loff_t *ppos)
  8594. +{
  8595. + return simple_data_read(filp, ubuf, cnt, ppos, &data.count);
  8596. +}
  8597. +
  8598. +/**
  8599. + * debug_count_fwrite - Write function for "count" debugfs entry
  8600. + * @filp: The active open file structure for the debugfs "file"
  8601. + * @ubuf: The user buffer that contains the value to write
  8602. + * @cnt: The maximum number of bytes to write to "file"
  8603. + * @ppos: The current position in the debugfs "file"
  8604. + *
  8605. + * This function provides a write implementation for the "count" debugfs
  8606. + * interface to the hardware latency detector. Can be used to write a
  8607. + * desired value, especially to zero the total count.
  8608. + */
  8609. +static ssize_t debug_count_fwrite(struct file *filp,
  8610. + const char __user *ubuf,
  8611. + size_t cnt,
  8612. + loff_t *ppos)
  8613. +{
  8614. + return simple_data_write(filp, ubuf, cnt, ppos, &data.count);
  8615. +}
  8616. +
  8617. +/**
  8618. + * debug_enable_fopen - Dummy open function for "enable" debugfs interface
  8619. + * @inode: The in-kernel inode representation of the debugfs "file"
  8620. + * @filp: The active open file structure for the debugfs "file"
  8621. + *
  8622. + * This function provides an open implementation for the "enable" debugfs
  8623. + * interface to the hardware latency detector.
  8624. + */
  8625. +static int debug_enable_fopen(struct inode *inode, struct file *filp)
  8626. +{
  8627. + return 0;
  8628. +}
  8629. +
  8630. +/**
  8631. + * debug_enable_fread - Read function for "enable" debugfs interface
  8632. + * @filp: The active open file structure for the debugfs "file"
  8633. + * @ubuf: The userspace provided buffer to read value into
  8634. + * @cnt: The maximum number of bytes to read
  8635. + * @ppos: The current "file" position
  8636. + *
  8637. + * This function provides a read implementation for the "enable" debugfs
  8638. + * interface to the hardware latency detector. Can be used to determine
  8639. + * whether the detector is currently enabled ("0\n" or "1\n" returned).
  8640. + */
  8641. +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
  8642. + size_t cnt, loff_t *ppos)
  8643. +{
  8644. + char buf[4];
  8645. +
  8646. + if ((cnt < sizeof(buf)) || (*ppos))
  8647. + return 0;
  8648. +
  8649. + buf[0] = enabled ? '1' : '0';
  8650. + buf[1] = '\n';
  8651. + buf[2] = '\0';
  8652. + if (copy_to_user(ubuf, buf, strlen(buf)))
  8653. + return -EFAULT;
  8654. + return *ppos = strlen(buf);
  8655. +}
  8656. +
  8657. +/**
  8658. + * debug_enable_fwrite - Write function for "enable" debugfs interface
  8659. + * @filp: The active open file structure for the debugfs "file"
  8660. + * @ubuf: The user buffer that contains the value to write
  8661. + * @cnt: The maximum number of bytes to write to "file"
  8662. + * @ppos: The current position in the debugfs "file"
  8663. + *
  8664. + * This function provides a write implementation for the "enable" debugfs
  8665. + * interface to the hardware latency detector. Can be used to enable or
  8666. + * disable the detector, which will have the side-effect of possibly
  8667. + * also resetting the global stats and kicking off the measuring
  8668. + * kthread (on an enable) or the converse (upon a disable).
  8669. + */
  8670. +static ssize_t debug_enable_fwrite(struct file *filp,
  8671. + const char __user *ubuf,
  8672. + size_t cnt,
  8673. + loff_t *ppos)
  8674. +{
  8675. + char buf[4];
  8676. + int csize = min(cnt, sizeof(buf));
  8677. + long val = 0;
  8678. + int err = 0;
  8679. +
  8680. + memset(buf, '\0', sizeof(buf));
  8681. + if (copy_from_user(buf, ubuf, csize))
  8682. + return -EFAULT;
  8683. +
  8684. + buf[sizeof(buf)-1] = '\0'; /* just in case */
  8685. + err = kstrtoul(buf, 10, &val);
  8686. + if (err)
  8687. + return -EINVAL;
  8688. +
  8689. + if (val) {
  8690. + if (enabled)
  8691. + goto unlock;
  8692. + enabled = 1;
  8693. + __reset_stats();
  8694. + if (start_kthread())
  8695. + return -EFAULT;
  8696. + } else {
  8697. + if (!enabled)
  8698. + goto unlock;
  8699. + enabled = 0;
  8700. + err = stop_kthread();
  8701. + if (err) {
  8702. + pr_err(BANNER "cannot stop kthread\n");
  8703. + return -EFAULT;
  8704. + }
  8705. + wake_up(&data.wq); /* reader(s) should return */
  8706. + }
  8707. +unlock:
  8708. + return csize;
  8709. +}
  8710. +
  8711. +/**
  8712. + * debug_max_fopen - Open function for "max" debugfs entry
  8713. + * @inode: The in-kernel inode representation of the debugfs "file"
  8714. + * @filp: The active open file structure for the debugfs "file"
  8715. + *
  8716. + * This function provides an open implementation for the "max" debugfs
  8717. + * interface to the hardware latency detector.
  8718. + */
  8719. +static int debug_max_fopen(struct inode *inode, struct file *filp)
  8720. +{
  8721. + return 0;
  8722. +}
  8723. +
  8724. +/**
  8725. + * debug_max_fread - Read function for "max" debugfs entry
  8726. + * @filp: The active open file structure for the debugfs "file"
  8727. + * @ubuf: The userspace provided buffer to read value into
  8728. + * @cnt: The maximum number of bytes to read
  8729. + * @ppos: The current "file" position
  8730. + *
  8731. + * This function provides a read implementation for the "max" debugfs
  8732. + * interface to the hardware latency detector. Can be used to determine
  8733. + * the maximum latency value observed since it was last reset.
  8734. + */
  8735. +static ssize_t debug_max_fread(struct file *filp, char __user *ubuf,
  8736. + size_t cnt, loff_t *ppos)
  8737. +{
  8738. + return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample);
  8739. +}
  8740. +
  8741. +/**
  8742. + * debug_max_fwrite - Write function for "max" debugfs entry
  8743. + * @filp: The active open file structure for the debugfs "file"
  8744. + * @ubuf: The user buffer that contains the value to write
  8745. + * @cnt: The maximum number of bytes to write to "file"
  8746. + * @ppos: The current position in the debugfs "file"
  8747. + *
  8748. + * This function provides a write implementation for the "max" debugfs
  8749. + * interface to the hardware latency detector. Can be used to reset the
  8750. + * maximum or set it to some other desired value - if, then, subsequent
  8751. + * measurements exceed this value, the maximum will be updated.
  8752. + */
  8753. +static ssize_t debug_max_fwrite(struct file *filp,
  8754. + const char __user *ubuf,
  8755. + size_t cnt,
  8756. + loff_t *ppos)
  8757. +{
  8758. + return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample);
  8759. +}
  8760. +
  8761. +
  8762. +/**
  8763. + * debug_sample_fopen - An open function for "sample" debugfs interface
  8764. + * @inode: The in-kernel inode representation of this debugfs "file"
  8765. + * @filp: The active open file structure for the debugfs "file"
  8766. + *
  8767. + * This function handles opening the "sample" file within the hardware
  8768. + * latency detector debugfs directory interface. This file is used to read
  8769. + * raw samples from the global ring_buffer and allows the user to see a
  8770. + * running latency history. Can be opened blocking or non-blocking,
  8771. + * affecting whether it behaves as a buffer read pipe, or does not.
  8772. + * Implements simple locking to prevent multiple simultaneous use.
  8773. + */
  8774. +static int debug_sample_fopen(struct inode *inode, struct file *filp)
  8775. +{
  8776. + if (!atomic_add_unless(&data.sample_open, 1, 1))
  8777. + return -EBUSY;
  8778. + else
  8779. + return 0;
  8780. +}
  8781. +
  8782. +/**
  8783. + * debug_sample_fread - A read function for "sample" debugfs interface
  8784. + * @filp: The active open file structure for the debugfs "file"
  8785. + * @ubuf: The user buffer that will contain the samples read
  8786. + * @cnt: The maximum bytes to read from the debugfs "file"
  8787. + * @ppos: The current position in the debugfs "file"
  8788. + *
  8789. + * This function handles reading from the "sample" file within the hardware
  8790. + * latency detector debugfs directory interface. This file is used to read
  8791. + * raw samples from the global ring_buffer and allows the user to see a
  8792. + * running latency history. By default this will block pending a new
  8793. + * value written into the sample buffer, unless there are already a
  8794. + * number of value(s) waiting in the buffer, or the sample file was
  8795. + * previously opened in a non-blocking mode of operation.
  8796. + */
  8797. +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
  8798. + size_t cnt, loff_t *ppos)
  8799. +{
  8800. + int len = 0;
  8801. + char buf[64];
  8802. + struct sample *sample = NULL;
  8803. +
  8804. + if (!enabled)
  8805. + return 0;
  8806. +
  8807. + sample = kzalloc(sizeof(struct sample), GFP_KERNEL);
  8808. + if (!sample)
  8809. + return -ENOMEM;
  8810. +
  8811. + while (!buffer_get_sample(sample)) {
  8812. +
  8813. + DEFINE_WAIT(wait);
  8814. +
  8815. + if (filp->f_flags & O_NONBLOCK) {
  8816. + len = -EAGAIN;
  8817. + goto out;
  8818. + }
  8819. +
  8820. + prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE);
  8821. + schedule();
  8822. + finish_wait(&data.wq, &wait);
  8823. +
  8824. + if (signal_pending(current)) {
  8825. + len = -EINTR;
  8826. + goto out;
  8827. + }
  8828. +
  8829. + if (!enabled) { /* enable was toggled */
  8830. + len = 0;
  8831. + goto out;
  8832. + }
  8833. + }
  8834. +
  8835. + len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\t%llu\n",
  8836. + sample->timestamp.tv_sec,
  8837. + sample->timestamp.tv_nsec,
  8838. + sample->duration,
  8839. + sample->outer_duration);
  8840. +
  8841. +
  8842. + /* handling partial reads is more trouble than it's worth */
  8843. + if (len > cnt)
  8844. + goto out;
  8845. +
  8846. + if (copy_to_user(ubuf, buf, len))
  8847. + len = -EFAULT;
  8848. +
  8849. +out:
  8850. + kfree(sample);
  8851. + return len;
  8852. +}
  8853. +
  8854. +/**
  8855. + * debug_sample_release - Release function for "sample" debugfs interface
  8856. + * @inode: The in-kernel inode represenation of the debugfs "file"
  8857. + * @filp: The active open file structure for the debugfs "file"
  8858. + *
  8859. + * This function completes the close of the debugfs interface "sample" file.
  8860. + * Frees the sample_open "lock" so that other users may open the interface.
  8861. + */
  8862. +static int debug_sample_release(struct inode *inode, struct file *filp)
  8863. +{
  8864. + atomic_dec(&data.sample_open);
  8865. +
  8866. + return 0;
  8867. +}
  8868. +
  8869. +/**
  8870. + * debug_threshold_fopen - Open function for "threshold" debugfs entry
  8871. + * @inode: The in-kernel inode representation of the debugfs "file"
  8872. + * @filp: The active open file structure for the debugfs "file"
  8873. + *
  8874. + * This function provides an open implementation for the "threshold" debugfs
  8875. + * interface to the hardware latency detector.
  8876. + */
  8877. +static int debug_threshold_fopen(struct inode *inode, struct file *filp)
  8878. +{
  8879. + return 0;
  8880. +}
  8881. +
  8882. +/**
  8883. + * debug_threshold_fread - Read function for "threshold" debugfs entry
  8884. + * @filp: The active open file structure for the debugfs "file"
  8885. + * @ubuf: The userspace provided buffer to read value into
  8886. + * @cnt: The maximum number of bytes to read
  8887. + * @ppos: The current "file" position
  8888. + *
  8889. + * This function provides a read implementation for the "threshold" debugfs
  8890. + * interface to the hardware latency detector. It can be used to determine
  8891. + * the current threshold level at which a latency will be recorded in the
  8892. + * global ring buffer, typically on the order of 10us.
  8893. + */
  8894. +static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf,
  8895. + size_t cnt, loff_t *ppos)
  8896. +{
  8897. + return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold);
  8898. +}
  8899. +
  8900. +/**
  8901. + * debug_threshold_fwrite - Write function for "threshold" debugfs entry
  8902. + * @filp: The active open file structure for the debugfs "file"
  8903. + * @ubuf: The user buffer that contains the value to write
  8904. + * @cnt: The maximum number of bytes to write to "file"
  8905. + * @ppos: The current position in the debugfs "file"
  8906. + *
  8907. + * This function provides a write implementation for the "threshold" debugfs
  8908. + * interface to the hardware latency detector. It can be used to configure
  8909. + * the threshold level at which any subsequently detected latencies will
  8910. + * be recorded into the global ring buffer.
  8911. + */
  8912. +static ssize_t debug_threshold_fwrite(struct file *filp,
  8913. + const char __user *ubuf,
  8914. + size_t cnt,
  8915. + loff_t *ppos)
  8916. +{
  8917. + int ret;
  8918. +
  8919. + ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold);
  8920. +
  8921. + if (enabled)
  8922. + wake_up_process(kthread);
  8923. +
  8924. + return ret;
  8925. +}
  8926. +
  8927. +/**
  8928. + * debug_width_fopen - Open function for "width" debugfs entry
  8929. + * @inode: The in-kernel inode representation of the debugfs "file"
  8930. + * @filp: The active open file structure for the debugfs "file"
  8931. + *
  8932. + * This function provides an open implementation for the "width" debugfs
  8933. + * interface to the hardware latency detector.
  8934. + */
  8935. +static int debug_width_fopen(struct inode *inode, struct file *filp)
  8936. +{
  8937. + return 0;
  8938. +}
  8939. +
  8940. +/**
  8941. + * debug_width_fread - Read function for "width" debugfs entry
  8942. + * @filp: The active open file structure for the debugfs "file"
  8943. + * @ubuf: The userspace provided buffer to read value into
  8944. + * @cnt: The maximum number of bytes to read
  8945. + * @ppos: The current "file" position
  8946. + *
  8947. + * This function provides a read implementation for the "width" debugfs
  8948. + * interface to the hardware latency detector. It can be used to determine
  8949. + * for how many us of the total window us we will actively sample for any
  8950. + * hardware-induced latecy periods. Obviously, it is not possible to
  8951. + * sample constantly and have the system respond to a sample reader, or,
  8952. + * worse, without having the system appear to have gone out to lunch.
  8953. + */
  8954. +static ssize_t debug_width_fread(struct file *filp, char __user *ubuf,
  8955. + size_t cnt, loff_t *ppos)
  8956. +{
  8957. + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width);
  8958. +}
  8959. +
  8960. +/**
  8961. + * debug_width_fwrite - Write function for "width" debugfs entry
  8962. + * @filp: The active open file structure for the debugfs "file"
  8963. + * @ubuf: The user buffer that contains the value to write
  8964. + * @cnt: The maximum number of bytes to write to "file"
  8965. + * @ppos: The current position in the debugfs "file"
  8966. + *
  8967. + * This function provides a write implementation for the "width" debugfs
  8968. + * interface to the hardware latency detector. It can be used to configure
  8969. + * for how many us of the total window us we will actively sample for any
  8970. + * hardware-induced latency periods. Obviously, it is not possible to
  8971. + * sample constantly and have the system respond to a sample reader, or,
  8972. + * worse, without having the system appear to have gone out to lunch. It
  8973. + * is enforced that width is less that the total window size.
  8974. + */
  8975. +static ssize_t debug_width_fwrite(struct file *filp,
  8976. + const char __user *ubuf,
  8977. + size_t cnt,
  8978. + loff_t *ppos)
  8979. +{
  8980. + char buf[U64STR_SIZE];
  8981. + int csize = min(cnt, sizeof(buf));
  8982. + u64 val = 0;
  8983. + int err = 0;
  8984. +
  8985. + memset(buf, '\0', sizeof(buf));
  8986. + if (copy_from_user(buf, ubuf, csize))
  8987. + return -EFAULT;
  8988. +
  8989. + buf[U64STR_SIZE-1] = '\0'; /* just in case */
  8990. + err = kstrtoull(buf, 10, &val);
  8991. + if (err)
  8992. + return -EINVAL;
  8993. +
  8994. + mutex_lock(&data.lock);
  8995. + if (val < data.sample_window)
  8996. + data.sample_width = val;
  8997. + else {
  8998. + mutex_unlock(&data.lock);
  8999. + return -EINVAL;
  9000. + }
  9001. + mutex_unlock(&data.lock);
  9002. +
  9003. + if (enabled)
  9004. + wake_up_process(kthread);
  9005. +
  9006. + return csize;
  9007. +}
  9008. +
  9009. +/**
  9010. + * debug_window_fopen - Open function for "window" debugfs entry
  9011. + * @inode: The in-kernel inode representation of the debugfs "file"
  9012. + * @filp: The active open file structure for the debugfs "file"
  9013. + *
  9014. + * This function provides an open implementation for the "window" debugfs
  9015. + * interface to the hardware latency detector. The window is the total time
  9016. + * in us that will be considered one sample period. Conceptually, windows
  9017. + * occur back-to-back and contain a sample width period during which
  9018. + * actual sampling occurs.
  9019. + */
  9020. +static int debug_window_fopen(struct inode *inode, struct file *filp)
  9021. +{
  9022. + return 0;
  9023. +}
  9024. +
  9025. +/**
  9026. + * debug_window_fread - Read function for "window" debugfs entry
  9027. + * @filp: The active open file structure for the debugfs "file"
  9028. + * @ubuf: The userspace provided buffer to read value into
  9029. + * @cnt: The maximum number of bytes to read
  9030. + * @ppos: The current "file" position
  9031. + *
  9032. + * This function provides a read implementation for the "window" debugfs
  9033. + * interface to the hardware latency detector. The window is the total time
  9034. + * in us that will be considered one sample period. Conceptually, windows
  9035. + * occur back-to-back and contain a sample width period during which
  9036. + * actual sampling occurs. Can be used to read the total window size.
  9037. + */
  9038. +static ssize_t debug_window_fread(struct file *filp, char __user *ubuf,
  9039. + size_t cnt, loff_t *ppos)
  9040. +{
  9041. + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window);
  9042. +}
  9043. +
  9044. +/**
  9045. + * debug_window_fwrite - Write function for "window" debugfs entry
  9046. + * @filp: The active open file structure for the debugfs "file"
  9047. + * @ubuf: The user buffer that contains the value to write
  9048. + * @cnt: The maximum number of bytes to write to "file"
  9049. + * @ppos: The current position in the debugfs "file"
  9050. + *
  9051. + * This function provides a write implementation for the "window" debufds
  9052. + * interface to the hardware latency detetector. The window is the total time
  9053. + * in us that will be considered one sample period. Conceptually, windows
  9054. + * occur back-to-back and contain a sample width period during which
  9055. + * actual sampling occurs. Can be used to write a new total window size. It
  9056. + * is enfoced that any value written must be greater than the sample width
  9057. + * size, or an error results.
  9058. + */
  9059. +static ssize_t debug_window_fwrite(struct file *filp,
  9060. + const char __user *ubuf,
  9061. + size_t cnt,
  9062. + loff_t *ppos)
  9063. +{
  9064. + char buf[U64STR_SIZE];
  9065. + int csize = min(cnt, sizeof(buf));
  9066. + u64 val = 0;
  9067. + int err = 0;
  9068. +
  9069. + memset(buf, '\0', sizeof(buf));
  9070. + if (copy_from_user(buf, ubuf, csize))
  9071. + return -EFAULT;
  9072. +
  9073. + buf[U64STR_SIZE-1] = '\0'; /* just in case */
  9074. + err = kstrtoull(buf, 10, &val);
  9075. + if (err)
  9076. + return -EINVAL;
  9077. +
  9078. + mutex_lock(&data.lock);
  9079. + if (data.sample_width < val)
  9080. + data.sample_window = val;
  9081. + else {
  9082. + mutex_unlock(&data.lock);
  9083. + return -EINVAL;
  9084. + }
  9085. + mutex_unlock(&data.lock);
  9086. +
  9087. + return csize;
  9088. +}
  9089. +
  9090. +/*
  9091. + * Function pointers for the "count" debugfs file operations
  9092. + */
  9093. +static const struct file_operations count_fops = {
  9094. + .open = debug_count_fopen,
  9095. + .read = debug_count_fread,
  9096. + .write = debug_count_fwrite,
  9097. + .owner = THIS_MODULE,
  9098. +};
  9099. +
  9100. +/*
  9101. + * Function pointers for the "enable" debugfs file operations
  9102. + */
  9103. +static const struct file_operations enable_fops = {
  9104. + .open = debug_enable_fopen,
  9105. + .read = debug_enable_fread,
  9106. + .write = debug_enable_fwrite,
  9107. + .owner = THIS_MODULE,
  9108. +};
  9109. +
  9110. +/*
  9111. + * Function pointers for the "max" debugfs file operations
  9112. + */
  9113. +static const struct file_operations max_fops = {
  9114. + .open = debug_max_fopen,
  9115. + .read = debug_max_fread,
  9116. + .write = debug_max_fwrite,
  9117. + .owner = THIS_MODULE,
  9118. +};
  9119. +
  9120. +/*
  9121. + * Function pointers for the "sample" debugfs file operations
  9122. + */
  9123. +static const struct file_operations sample_fops = {
  9124. + .open = debug_sample_fopen,
  9125. + .read = debug_sample_fread,
  9126. + .release = debug_sample_release,
  9127. + .owner = THIS_MODULE,
  9128. +};
  9129. +
  9130. +/*
  9131. + * Function pointers for the "threshold" debugfs file operations
  9132. + */
  9133. +static const struct file_operations threshold_fops = {
  9134. + .open = debug_threshold_fopen,
  9135. + .read = debug_threshold_fread,
  9136. + .write = debug_threshold_fwrite,
  9137. + .owner = THIS_MODULE,
  9138. +};
  9139. +
  9140. +/*
  9141. + * Function pointers for the "width" debugfs file operations
  9142. + */
  9143. +static const struct file_operations width_fops = {
  9144. + .open = debug_width_fopen,
  9145. + .read = debug_width_fread,
  9146. + .write = debug_width_fwrite,
  9147. + .owner = THIS_MODULE,
  9148. +};
  9149. +
  9150. +/*
  9151. + * Function pointers for the "window" debugfs file operations
  9152. + */
  9153. +static const struct file_operations window_fops = {
  9154. + .open = debug_window_fopen,
  9155. + .read = debug_window_fread,
  9156. + .write = debug_window_fwrite,
  9157. + .owner = THIS_MODULE,
  9158. +};
  9159. +
  9160. +/**
  9161. + * init_debugfs - A function to initialize the debugfs interface files
  9162. + *
  9163. + * This function creates entries in debugfs for "hwlat_detector", including
  9164. + * files to read values from the detector, current samples, and the
  9165. + * maximum sample that has been captured since the hardware latency
  9166. + * dectector was started.
  9167. + */
  9168. +static int init_debugfs(void)
  9169. +{
  9170. + int ret = -ENOMEM;
  9171. +
  9172. + debug_dir = debugfs_create_dir(DRVNAME, NULL);
  9173. + if (!debug_dir)
  9174. + goto err_debug_dir;
  9175. +
  9176. + debug_sample = debugfs_create_file("sample", 0444,
  9177. + debug_dir, NULL,
  9178. + &sample_fops);
  9179. + if (!debug_sample)
  9180. + goto err_sample;
  9181. +
  9182. + debug_count = debugfs_create_file("count", 0444,
  9183. + debug_dir, NULL,
  9184. + &count_fops);
  9185. + if (!debug_count)
  9186. + goto err_count;
  9187. +
  9188. + debug_max = debugfs_create_file("max", 0444,
  9189. + debug_dir, NULL,
  9190. + &max_fops);
  9191. + if (!debug_max)
  9192. + goto err_max;
  9193. +
  9194. + debug_sample_window = debugfs_create_file("window", 0644,
  9195. + debug_dir, NULL,
  9196. + &window_fops);
  9197. + if (!debug_sample_window)
  9198. + goto err_window;
  9199. +
  9200. + debug_sample_width = debugfs_create_file("width", 0644,
  9201. + debug_dir, NULL,
  9202. + &width_fops);
  9203. + if (!debug_sample_width)
  9204. + goto err_width;
  9205. +
  9206. + debug_threshold = debugfs_create_file("threshold", 0644,
  9207. + debug_dir, NULL,
  9208. + &threshold_fops);
  9209. + if (!debug_threshold)
  9210. + goto err_threshold;
  9211. +
  9212. + debug_enable = debugfs_create_file("enable", 0644,
  9213. + debug_dir, &enabled,
  9214. + &enable_fops);
  9215. + if (!debug_enable)
  9216. + goto err_enable;
  9217. +
  9218. + else {
  9219. + ret = 0;
  9220. + goto out;
  9221. + }
  9222. +
  9223. +err_enable:
  9224. + debugfs_remove(debug_threshold);
  9225. +err_threshold:
  9226. + debugfs_remove(debug_sample_width);
  9227. +err_width:
  9228. + debugfs_remove(debug_sample_window);
  9229. +err_window:
  9230. + debugfs_remove(debug_max);
  9231. +err_max:
  9232. + debugfs_remove(debug_count);
  9233. +err_count:
  9234. + debugfs_remove(debug_sample);
  9235. +err_sample:
  9236. + debugfs_remove(debug_dir);
  9237. +err_debug_dir:
  9238. +out:
  9239. + return ret;
  9240. +}
  9241. +
  9242. +/**
  9243. + * free_debugfs - A function to cleanup the debugfs file interface
  9244. + */
  9245. +static void free_debugfs(void)
  9246. +{
  9247. + /* could also use a debugfs_remove_recursive */
  9248. + debugfs_remove(debug_enable);
  9249. + debugfs_remove(debug_threshold);
  9250. + debugfs_remove(debug_sample_width);
  9251. + debugfs_remove(debug_sample_window);
  9252. + debugfs_remove(debug_max);
  9253. + debugfs_remove(debug_count);
  9254. + debugfs_remove(debug_sample);
  9255. + debugfs_remove(debug_dir);
  9256. +}
  9257. +
  9258. +/**
  9259. + * detector_init - Standard module initialization code
  9260. + */
  9261. +static int detector_init(void)
  9262. +{
  9263. + int ret = -ENOMEM;
  9264. +
  9265. + pr_info(BANNER "version %s\n", VERSION);
  9266. +
  9267. + ret = init_stats();
  9268. + if (ret)
  9269. + goto out;
  9270. +
  9271. + ret = init_debugfs();
  9272. + if (ret)
  9273. + goto err_stats;
  9274. +
  9275. + if (enabled)
  9276. + ret = start_kthread();
  9277. +
  9278. + goto out;
  9279. +
  9280. +err_stats:
  9281. + ring_buffer_free(ring_buffer);
  9282. +out:
  9283. + return ret;
  9284. +
  9285. +}
  9286. +
  9287. +/**
  9288. + * detector_exit - Standard module cleanup code
  9289. + */
  9290. +static void detector_exit(void)
  9291. +{
  9292. + int err;
  9293. +
  9294. + if (enabled) {
  9295. + enabled = 0;
  9296. + err = stop_kthread();
  9297. + if (err)
  9298. + pr_err(BANNER "cannot stop kthread\n");
  9299. + }
  9300. +
  9301. + free_debugfs();
  9302. + ring_buffer_free(ring_buffer); /* free up the ring buffer */
  9303. +
  9304. +}
  9305. +
  9306. +module_init(detector_init);
  9307. +module_exit(detector_exit);
  9308. diff -Nur linux-4.4.62.orig/drivers/misc/Kconfig linux-4.4.62/drivers/misc/Kconfig
  9309. --- linux-4.4.62.orig/drivers/misc/Kconfig 2017-04-18 07:15:37.000000000 +0200
  9310. +++ linux-4.4.62/drivers/misc/Kconfig 2017-04-18 17:38:08.054643816 +0200
  9311. @@ -54,6 +54,7 @@
  9312. config ATMEL_TCLIB
  9313. bool "Atmel AT32/AT91 Timer/Counter Library"
  9314. depends on (AVR32 || ARCH_AT91)
  9315. + default y if PREEMPT_RT_FULL
  9316. help
  9317. Select this if you want a library to allocate the Timer/Counter
  9318. blocks found on many Atmel processors. This facilitates using
  9319. @@ -69,8 +70,7 @@
  9320. are combined to make a single 32-bit timer.
  9321. When GENERIC_CLOCKEVENTS is defined, the third timer channel
  9322. - may be used as a clock event device supporting oneshot mode
  9323. - (delays of up to two seconds) based on the 32 KiHz clock.
  9324. + may be used as a clock event device supporting oneshot mode.
  9325. config ATMEL_TCB_CLKSRC_BLOCK
  9326. int
  9327. @@ -84,6 +84,15 @@
  9328. TC can be used for other purposes, such as PWM generation and
  9329. interval timing.
  9330. +config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
  9331. + bool "TC Block use 32 KiHz clock"
  9332. + depends on ATMEL_TCB_CLKSRC
  9333. + default y if !PREEMPT_RT_FULL
  9334. + help
  9335. + Select this to use 32 KiHz base clock rate as TC block clock
  9336. + source for clock events.
  9337. +
  9338. +
  9339. config DUMMY_IRQ
  9340. tristate "Dummy IRQ handler"
  9341. default n
  9342. @@ -113,6 +122,35 @@
  9343. for information on the specific driver level and support statement
  9344. for your IBM server.
  9345. +config HWLAT_DETECTOR
  9346. + tristate "Testing module to detect hardware-induced latencies"
  9347. + depends on DEBUG_FS
  9348. + depends on RING_BUFFER
  9349. + default m
  9350. + ---help---
  9351. + A simple hardware latency detector. Use this module to detect
  9352. + large latencies introduced by the behavior of the underlying
  9353. + system firmware external to Linux. We do this using periodic
  9354. + use of stop_machine to grab all available CPUs and measure
  9355. + for unexplainable gaps in the CPU timestamp counter(s). By
  9356. + default, the module is not enabled until the "enable" file
  9357. + within the "hwlat_detector" debugfs directory is toggled.
  9358. +
  9359. + This module is often used to detect SMI (System Management
  9360. + Interrupts) on x86 systems, though is not x86 specific. To
  9361. + this end, we default to using a sample window of 1 second,
  9362. + during which we will sample for 0.5 seconds. If an SMI or
  9363. + similar event occurs during that time, it is recorded
  9364. + into an 8K samples global ring buffer until retreived.
  9365. +
  9366. + WARNING: This software should never be enabled (it can be built
  9367. + but should not be turned on after it is loaded) in a production
  9368. + environment where high latencies are a concern since the
  9369. + sampling mechanism actually introduces latencies for
  9370. + regular tasks while the CPU(s) are being held.
  9371. +
  9372. + If unsure, say N
  9373. +
  9374. config PHANTOM
  9375. tristate "Sensable PHANToM (PCI)"
  9376. depends on PCI
  9377. diff -Nur linux-4.4.62.orig/drivers/misc/Makefile linux-4.4.62/drivers/misc/Makefile
  9378. --- linux-4.4.62.orig/drivers/misc/Makefile 2017-04-18 07:15:37.000000000 +0200
  9379. +++ linux-4.4.62/drivers/misc/Makefile 2017-04-18 17:38:08.054643816 +0200
  9380. @@ -39,6 +39,7 @@
  9381. obj-$(CONFIG_HMC6352) += hmc6352.o
  9382. obj-y += eeprom/
  9383. obj-y += cb710/
  9384. +obj-$(CONFIG_HWLAT_DETECTOR) += hwlat_detector.o
  9385. obj-$(CONFIG_SPEAR13XX_PCIE_GADGET) += spear13xx_pcie_gadget.o
  9386. obj-$(CONFIG_VMWARE_BALLOON) += vmw_balloon.o
  9387. obj-$(CONFIG_ARM_CHARLCD) += arm-charlcd.o
  9388. diff -Nur linux-4.4.62.orig/drivers/mmc/host/mmci.c linux-4.4.62/drivers/mmc/host/mmci.c
  9389. --- linux-4.4.62.orig/drivers/mmc/host/mmci.c 2017-04-18 07:15:37.000000000 +0200
  9390. +++ linux-4.4.62/drivers/mmc/host/mmci.c 2017-04-18 17:38:08.054643816 +0200
  9391. @@ -1155,15 +1155,12 @@
  9392. struct sg_mapping_iter *sg_miter = &host->sg_miter;
  9393. struct variant_data *variant = host->variant;
  9394. void __iomem *base = host->base;
  9395. - unsigned long flags;
  9396. u32 status;
  9397. status = readl(base + MMCISTATUS);
  9398. dev_dbg(mmc_dev(host->mmc), "irq1 (pio) %08x\n", status);
  9399. - local_irq_save(flags);
  9400. -
  9401. do {
  9402. unsigned int remain, len;
  9403. char *buffer;
  9404. @@ -1203,8 +1200,6 @@
  9405. sg_miter_stop(sg_miter);
  9406. - local_irq_restore(flags);
  9407. -
  9408. /*
  9409. * If we have less than the fifo 'half-full' threshold to transfer,
  9410. * trigger a PIO interrupt as soon as any data is available.
  9411. diff -Nur linux-4.4.62.orig/drivers/net/ethernet/3com/3c59x.c linux-4.4.62/drivers/net/ethernet/3com/3c59x.c
  9412. --- linux-4.4.62.orig/drivers/net/ethernet/3com/3c59x.c 2017-04-18 07:15:37.000000000 +0200
  9413. +++ linux-4.4.62/drivers/net/ethernet/3com/3c59x.c 2017-04-18 17:38:08.058643971 +0200
  9414. @@ -842,9 +842,9 @@
  9415. {
  9416. struct vortex_private *vp = netdev_priv(dev);
  9417. unsigned long flags;
  9418. - local_irq_save(flags);
  9419. + local_irq_save_nort(flags);
  9420. (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev);
  9421. - local_irq_restore(flags);
  9422. + local_irq_restore_nort(flags);
  9423. }
  9424. #endif
  9425. @@ -1916,12 +1916,12 @@
  9426. * Block interrupts because vortex_interrupt does a bare spin_lock()
  9427. */
  9428. unsigned long flags;
  9429. - local_irq_save(flags);
  9430. + local_irq_save_nort(flags);
  9431. if (vp->full_bus_master_tx)
  9432. boomerang_interrupt(dev->irq, dev);
  9433. else
  9434. vortex_interrupt(dev->irq, dev);
  9435. - local_irq_restore(flags);
  9436. + local_irq_restore_nort(flags);
  9437. }
  9438. }
  9439. diff -Nur linux-4.4.62.orig/drivers/net/ethernet/atheros/atl1c/atl1c_main.c linux-4.4.62/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
  9440. --- linux-4.4.62.orig/drivers/net/ethernet/atheros/atl1c/atl1c_main.c 2017-04-18 07:15:37.000000000 +0200
  9441. +++ linux-4.4.62/drivers/net/ethernet/atheros/atl1c/atl1c_main.c 2017-04-18 17:38:08.058643971 +0200
  9442. @@ -2221,11 +2221,7 @@
  9443. }
  9444. tpd_req = atl1c_cal_tpd_req(skb);
  9445. - if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) {
  9446. - if (netif_msg_pktdata(adapter))
  9447. - dev_info(&adapter->pdev->dev, "tx locked\n");
  9448. - return NETDEV_TX_LOCKED;
  9449. - }
  9450. + spin_lock_irqsave(&adapter->tx_lock, flags);
  9451. if (atl1c_tpd_avail(adapter, type) < tpd_req) {
  9452. /* no enough descriptor, just stop queue */
  9453. diff -Nur linux-4.4.62.orig/drivers/net/ethernet/atheros/atl1e/atl1e_main.c linux-4.4.62/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
  9454. --- linux-4.4.62.orig/drivers/net/ethernet/atheros/atl1e/atl1e_main.c 2017-04-18 07:15:37.000000000 +0200
  9455. +++ linux-4.4.62/drivers/net/ethernet/atheros/atl1e/atl1e_main.c 2017-04-18 17:38:08.058643971 +0200
  9456. @@ -1880,8 +1880,7 @@
  9457. return NETDEV_TX_OK;
  9458. }
  9459. tpd_req = atl1e_cal_tdp_req(skb);
  9460. - if (!spin_trylock_irqsave(&adapter->tx_lock, flags))
  9461. - return NETDEV_TX_LOCKED;
  9462. + spin_lock_irqsave(&adapter->tx_lock, flags);
  9463. if (atl1e_tpd_avail(adapter) < tpd_req) {
  9464. /* no enough descriptor, just stop queue */
  9465. diff -Nur linux-4.4.62.orig/drivers/net/ethernet/chelsio/cxgb/sge.c linux-4.4.62/drivers/net/ethernet/chelsio/cxgb/sge.c
  9466. --- linux-4.4.62.orig/drivers/net/ethernet/chelsio/cxgb/sge.c 2017-04-18 07:15:37.000000000 +0200
  9467. +++ linux-4.4.62/drivers/net/ethernet/chelsio/cxgb/sge.c 2017-04-18 17:38:08.058643971 +0200
  9468. @@ -1664,8 +1664,7 @@
  9469. struct cmdQ *q = &sge->cmdQ[qid];
  9470. unsigned int credits, pidx, genbit, count, use_sched_skb = 0;
  9471. - if (!spin_trylock(&q->lock))
  9472. - return NETDEV_TX_LOCKED;
  9473. + spin_lock(&q->lock);
  9474. reclaim_completed_tx(sge, q);
  9475. diff -Nur linux-4.4.62.orig/drivers/net/ethernet/neterion/s2io.c linux-4.4.62/drivers/net/ethernet/neterion/s2io.c
  9476. --- linux-4.4.62.orig/drivers/net/ethernet/neterion/s2io.c 2017-04-18 07:15:37.000000000 +0200
  9477. +++ linux-4.4.62/drivers/net/ethernet/neterion/s2io.c 2017-04-18 17:38:08.058643971 +0200
  9478. @@ -4084,12 +4084,7 @@
  9479. [skb->priority & (MAX_TX_FIFOS - 1)];
  9480. fifo = &mac_control->fifos[queue];
  9481. - if (do_spin_lock)
  9482. - spin_lock_irqsave(&fifo->tx_lock, flags);
  9483. - else {
  9484. - if (unlikely(!spin_trylock_irqsave(&fifo->tx_lock, flags)))
  9485. - return NETDEV_TX_LOCKED;
  9486. - }
  9487. + spin_lock_irqsave(&fifo->tx_lock, flags);
  9488. if (sp->config.multiq) {
  9489. if (__netif_subqueue_stopped(dev, fifo->fifo_no)) {
  9490. diff -Nur linux-4.4.62.orig/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c linux-4.4.62/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
  9491. --- linux-4.4.62.orig/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c 2017-04-18 07:15:37.000000000 +0200
  9492. +++ linux-4.4.62/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c 2017-04-18 17:38:08.058643971 +0200
  9493. @@ -2137,10 +2137,8 @@
  9494. struct pch_gbe_tx_ring *tx_ring = adapter->tx_ring;
  9495. unsigned long flags;
  9496. - if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags)) {
  9497. - /* Collision - tell upper layer to requeue */
  9498. - return NETDEV_TX_LOCKED;
  9499. - }
  9500. + spin_lock_irqsave(&tx_ring->tx_lock, flags);
  9501. +
  9502. if (unlikely(!PCH_GBE_DESC_UNUSED(tx_ring))) {
  9503. netif_stop_queue(netdev);
  9504. spin_unlock_irqrestore(&tx_ring->tx_lock, flags);
  9505. diff -Nur linux-4.4.62.orig/drivers/net/ethernet/realtek/8139too.c linux-4.4.62/drivers/net/ethernet/realtek/8139too.c
  9506. --- linux-4.4.62.orig/drivers/net/ethernet/realtek/8139too.c 2017-04-18 07:15:37.000000000 +0200
  9507. +++ linux-4.4.62/drivers/net/ethernet/realtek/8139too.c 2017-04-18 17:38:08.058643971 +0200
  9508. @@ -2229,7 +2229,7 @@
  9509. struct rtl8139_private *tp = netdev_priv(dev);
  9510. const int irq = tp->pci_dev->irq;
  9511. - disable_irq(irq);
  9512. + disable_irq_nosync(irq);
  9513. rtl8139_interrupt(irq, dev);
  9514. enable_irq(irq);
  9515. }
  9516. diff -Nur linux-4.4.62.orig/drivers/net/ethernet/tehuti/tehuti.c linux-4.4.62/drivers/net/ethernet/tehuti/tehuti.c
  9517. --- linux-4.4.62.orig/drivers/net/ethernet/tehuti/tehuti.c 2017-04-18 07:15:37.000000000 +0200
  9518. +++ linux-4.4.62/drivers/net/ethernet/tehuti/tehuti.c 2017-04-18 17:38:08.058643971 +0200
  9519. @@ -1629,13 +1629,8 @@
  9520. unsigned long flags;
  9521. ENTER;
  9522. - local_irq_save(flags);
  9523. - if (!spin_trylock(&priv->tx_lock)) {
  9524. - local_irq_restore(flags);
  9525. - DBG("%s[%s]: TX locked, returning NETDEV_TX_LOCKED\n",
  9526. - BDX_DRV_NAME, ndev->name);
  9527. - return NETDEV_TX_LOCKED;
  9528. - }
  9529. +
  9530. + spin_lock_irqsave(&priv->tx_lock, flags);
  9531. /* build tx descriptor */
  9532. BDX_ASSERT(f->m.wptr >= f->m.memsz); /* started with valid wptr */
  9533. diff -Nur linux-4.4.62.orig/drivers/net/rionet.c linux-4.4.62/drivers/net/rionet.c
  9534. --- linux-4.4.62.orig/drivers/net/rionet.c 2017-04-18 07:15:37.000000000 +0200
  9535. +++ linux-4.4.62/drivers/net/rionet.c 2017-04-18 17:38:08.062644126 +0200
  9536. @@ -174,11 +174,7 @@
  9537. unsigned long flags;
  9538. int add_num = 1;
  9539. - local_irq_save(flags);
  9540. - if (!spin_trylock(&rnet->tx_lock)) {
  9541. - local_irq_restore(flags);
  9542. - return NETDEV_TX_LOCKED;
  9543. - }
  9544. + spin_lock_irqsave(&rnet->tx_lock, flags);
  9545. if (is_multicast_ether_addr(eth->h_dest))
  9546. add_num = nets[rnet->mport->id].nact;
  9547. diff -Nur linux-4.4.62.orig/drivers/net/wireless/orinoco/orinoco_usb.c linux-4.4.62/drivers/net/wireless/orinoco/orinoco_usb.c
  9548. --- linux-4.4.62.orig/drivers/net/wireless/orinoco/orinoco_usb.c 2017-04-18 07:15:37.000000000 +0200
  9549. +++ linux-4.4.62/drivers/net/wireless/orinoco/orinoco_usb.c 2017-04-18 17:38:08.062644126 +0200
  9550. @@ -697,7 +697,7 @@
  9551. while (!ctx->done.done && msecs--)
  9552. udelay(1000);
  9553. } else {
  9554. - wait_event_interruptible(ctx->done.wait,
  9555. + swait_event_interruptible(ctx->done.wait,
  9556. ctx->done.done);
  9557. }
  9558. break;
  9559. diff -Nur linux-4.4.62.orig/drivers/pci/access.c linux-4.4.62/drivers/pci/access.c
  9560. --- linux-4.4.62.orig/drivers/pci/access.c 2017-04-18 07:15:37.000000000 +0200
  9561. +++ linux-4.4.62/drivers/pci/access.c 2017-04-18 17:38:08.062644126 +0200
  9562. @@ -561,7 +561,7 @@
  9563. WARN_ON(!dev->block_cfg_access);
  9564. dev->block_cfg_access = 0;
  9565. - wake_up_all(&pci_cfg_wait);
  9566. + wake_up_all_locked(&pci_cfg_wait);
  9567. raw_spin_unlock_irqrestore(&pci_lock, flags);
  9568. }
  9569. EXPORT_SYMBOL_GPL(pci_cfg_access_unlock);
  9570. diff -Nur linux-4.4.62.orig/drivers/pinctrl/qcom/pinctrl-msm.c linux-4.4.62/drivers/pinctrl/qcom/pinctrl-msm.c
  9571. --- linux-4.4.62.orig/drivers/pinctrl/qcom/pinctrl-msm.c 2017-04-18 07:15:37.000000000 +0200
  9572. +++ linux-4.4.62/drivers/pinctrl/qcom/pinctrl-msm.c 2017-04-18 17:38:08.062644126 +0200
  9573. @@ -60,7 +60,7 @@
  9574. struct notifier_block restart_nb;
  9575. int irq;
  9576. - spinlock_t lock;
  9577. + raw_spinlock_t lock;
  9578. DECLARE_BITMAP(dual_edge_irqs, MAX_NR_GPIO);
  9579. DECLARE_BITMAP(enabled_irqs, MAX_NR_GPIO);
  9580. @@ -156,14 +156,14 @@
  9581. if (WARN_ON(i == g->nfuncs))
  9582. return -EINVAL;
  9583. - spin_lock_irqsave(&pctrl->lock, flags);
  9584. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  9585. val = readl(pctrl->regs + g->ctl_reg);
  9586. val &= ~(0x7 << g->mux_bit);
  9587. val |= i << g->mux_bit;
  9588. writel(val, pctrl->regs + g->ctl_reg);
  9589. - spin_unlock_irqrestore(&pctrl->lock, flags);
  9590. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  9591. return 0;
  9592. }
  9593. @@ -326,14 +326,14 @@
  9594. break;
  9595. case PIN_CONFIG_OUTPUT:
  9596. /* set output value */
  9597. - spin_lock_irqsave(&pctrl->lock, flags);
  9598. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  9599. val = readl(pctrl->regs + g->io_reg);
  9600. if (arg)
  9601. val |= BIT(g->out_bit);
  9602. else
  9603. val &= ~BIT(g->out_bit);
  9604. writel(val, pctrl->regs + g->io_reg);
  9605. - spin_unlock_irqrestore(&pctrl->lock, flags);
  9606. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  9607. /* enable output */
  9608. arg = 1;
  9609. @@ -354,12 +354,12 @@
  9610. return -EINVAL;
  9611. }
  9612. - spin_lock_irqsave(&pctrl->lock, flags);
  9613. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  9614. val = readl(pctrl->regs + g->ctl_reg);
  9615. val &= ~(mask << bit);
  9616. val |= arg << bit;
  9617. writel(val, pctrl->regs + g->ctl_reg);
  9618. - spin_unlock_irqrestore(&pctrl->lock, flags);
  9619. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  9620. }
  9621. return 0;
  9622. @@ -387,13 +387,13 @@
  9623. g = &pctrl->soc->groups[offset];
  9624. - spin_lock_irqsave(&pctrl->lock, flags);
  9625. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  9626. val = readl(pctrl->regs + g->ctl_reg);
  9627. val &= ~BIT(g->oe_bit);
  9628. writel(val, pctrl->regs + g->ctl_reg);
  9629. - spin_unlock_irqrestore(&pctrl->lock, flags);
  9630. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  9631. return 0;
  9632. }
  9633. @@ -407,7 +407,7 @@
  9634. g = &pctrl->soc->groups[offset];
  9635. - spin_lock_irqsave(&pctrl->lock, flags);
  9636. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  9637. val = readl(pctrl->regs + g->io_reg);
  9638. if (value)
  9639. @@ -420,7 +420,7 @@
  9640. val |= BIT(g->oe_bit);
  9641. writel(val, pctrl->regs + g->ctl_reg);
  9642. - spin_unlock_irqrestore(&pctrl->lock, flags);
  9643. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  9644. return 0;
  9645. }
  9646. @@ -446,7 +446,7 @@
  9647. g = &pctrl->soc->groups[offset];
  9648. - spin_lock_irqsave(&pctrl->lock, flags);
  9649. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  9650. val = readl(pctrl->regs + g->io_reg);
  9651. if (value)
  9652. @@ -455,7 +455,7 @@
  9653. val &= ~BIT(g->out_bit);
  9654. writel(val, pctrl->regs + g->io_reg);
  9655. - spin_unlock_irqrestore(&pctrl->lock, flags);
  9656. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  9657. }
  9658. #ifdef CONFIG_DEBUG_FS
  9659. @@ -574,7 +574,7 @@
  9660. g = &pctrl->soc->groups[d->hwirq];
  9661. - spin_lock_irqsave(&pctrl->lock, flags);
  9662. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  9663. val = readl(pctrl->regs + g->intr_cfg_reg);
  9664. val &= ~BIT(g->intr_enable_bit);
  9665. @@ -582,7 +582,7 @@
  9666. clear_bit(d->hwirq, pctrl->enabled_irqs);
  9667. - spin_unlock_irqrestore(&pctrl->lock, flags);
  9668. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  9669. }
  9670. static void msm_gpio_irq_unmask(struct irq_data *d)
  9671. @@ -595,7 +595,7 @@
  9672. g = &pctrl->soc->groups[d->hwirq];
  9673. - spin_lock_irqsave(&pctrl->lock, flags);
  9674. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  9675. val = readl(pctrl->regs + g->intr_cfg_reg);
  9676. val |= BIT(g->intr_enable_bit);
  9677. @@ -603,7 +603,7 @@
  9678. set_bit(d->hwirq, pctrl->enabled_irqs);
  9679. - spin_unlock_irqrestore(&pctrl->lock, flags);
  9680. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  9681. }
  9682. static void msm_gpio_irq_ack(struct irq_data *d)
  9683. @@ -616,7 +616,7 @@
  9684. g = &pctrl->soc->groups[d->hwirq];
  9685. - spin_lock_irqsave(&pctrl->lock, flags);
  9686. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  9687. val = readl(pctrl->regs + g->intr_status_reg);
  9688. if (g->intr_ack_high)
  9689. @@ -628,7 +628,7 @@
  9690. if (test_bit(d->hwirq, pctrl->dual_edge_irqs))
  9691. msm_gpio_update_dual_edge_pos(pctrl, g, d);
  9692. - spin_unlock_irqrestore(&pctrl->lock, flags);
  9693. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  9694. }
  9695. static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type)
  9696. @@ -641,7 +641,7 @@
  9697. g = &pctrl->soc->groups[d->hwirq];
  9698. - spin_lock_irqsave(&pctrl->lock, flags);
  9699. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  9700. /*
  9701. * For hw without possibility of detecting both edges
  9702. @@ -715,7 +715,7 @@
  9703. if (test_bit(d->hwirq, pctrl->dual_edge_irqs))
  9704. msm_gpio_update_dual_edge_pos(pctrl, g, d);
  9705. - spin_unlock_irqrestore(&pctrl->lock, flags);
  9706. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  9707. if (type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH))
  9708. irq_set_handler_locked(d, handle_level_irq);
  9709. @@ -731,11 +731,11 @@
  9710. struct msm_pinctrl *pctrl = to_msm_pinctrl(gc);
  9711. unsigned long flags;
  9712. - spin_lock_irqsave(&pctrl->lock, flags);
  9713. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  9714. irq_set_irq_wake(pctrl->irq, on);
  9715. - spin_unlock_irqrestore(&pctrl->lock, flags);
  9716. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  9717. return 0;
  9718. }
  9719. @@ -881,7 +881,7 @@
  9720. pctrl->soc = soc_data;
  9721. pctrl->chip = msm_gpio_template;
  9722. - spin_lock_init(&pctrl->lock);
  9723. + raw_spin_lock_init(&pctrl->lock);
  9724. res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
  9725. pctrl->regs = devm_ioremap_resource(&pdev->dev, res);
  9726. diff -Nur linux-4.4.62.orig/drivers/scsi/fcoe/fcoe.c linux-4.4.62/drivers/scsi/fcoe/fcoe.c
  9727. --- linux-4.4.62.orig/drivers/scsi/fcoe/fcoe.c 2017-04-18 07:15:37.000000000 +0200
  9728. +++ linux-4.4.62/drivers/scsi/fcoe/fcoe.c 2017-04-18 17:38:08.062644126 +0200
  9729. @@ -1286,7 +1286,7 @@
  9730. struct sk_buff *skb;
  9731. #ifdef CONFIG_SMP
  9732. struct fcoe_percpu_s *p0;
  9733. - unsigned targ_cpu = get_cpu();
  9734. + unsigned targ_cpu = get_cpu_light();
  9735. #endif /* CONFIG_SMP */
  9736. FCOE_DBG("Destroying receive thread for CPU %d\n", cpu);
  9737. @@ -1342,7 +1342,7 @@
  9738. kfree_skb(skb);
  9739. spin_unlock_bh(&p->fcoe_rx_list.lock);
  9740. }
  9741. - put_cpu();
  9742. + put_cpu_light();
  9743. #else
  9744. /*
  9745. * This a non-SMP scenario where the singular Rx thread is
  9746. @@ -1566,11 +1566,11 @@
  9747. static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen)
  9748. {
  9749. struct fcoe_percpu_s *fps;
  9750. - int rc;
  9751. + int rc, cpu = get_cpu_light();
  9752. - fps = &get_cpu_var(fcoe_percpu);
  9753. + fps = &per_cpu(fcoe_percpu, cpu);
  9754. rc = fcoe_get_paged_crc_eof(skb, tlen, fps);
  9755. - put_cpu_var(fcoe_percpu);
  9756. + put_cpu_light();
  9757. return rc;
  9758. }
  9759. @@ -1766,11 +1766,11 @@
  9760. return 0;
  9761. }
  9762. - stats = per_cpu_ptr(lport->stats, get_cpu());
  9763. + stats = per_cpu_ptr(lport->stats, get_cpu_light());
  9764. stats->InvalidCRCCount++;
  9765. if (stats->InvalidCRCCount < 5)
  9766. printk(KERN_WARNING "fcoe: dropping frame with CRC error\n");
  9767. - put_cpu();
  9768. + put_cpu_light();
  9769. return -EINVAL;
  9770. }
  9771. @@ -1814,7 +1814,7 @@
  9772. */
  9773. hp = (struct fcoe_hdr *) skb_network_header(skb);
  9774. - stats = per_cpu_ptr(lport->stats, get_cpu());
  9775. + stats = per_cpu_ptr(lport->stats, get_cpu_light());
  9776. if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) {
  9777. if (stats->ErrorFrames < 5)
  9778. printk(KERN_WARNING "fcoe: FCoE version "
  9779. @@ -1846,13 +1846,13 @@
  9780. goto drop;
  9781. if (!fcoe_filter_frames(lport, fp)) {
  9782. - put_cpu();
  9783. + put_cpu_light();
  9784. fc_exch_recv(lport, fp);
  9785. return;
  9786. }
  9787. drop:
  9788. stats->ErrorFrames++;
  9789. - put_cpu();
  9790. + put_cpu_light();
  9791. kfree_skb(skb);
  9792. }
  9793. diff -Nur linux-4.4.62.orig/drivers/scsi/fcoe/fcoe_ctlr.c linux-4.4.62/drivers/scsi/fcoe/fcoe_ctlr.c
  9794. --- linux-4.4.62.orig/drivers/scsi/fcoe/fcoe_ctlr.c 2017-04-18 07:15:37.000000000 +0200
  9795. +++ linux-4.4.62/drivers/scsi/fcoe/fcoe_ctlr.c 2017-04-18 17:38:08.062644126 +0200
  9796. @@ -831,7 +831,7 @@
  9797. INIT_LIST_HEAD(&del_list);
  9798. - stats = per_cpu_ptr(fip->lp->stats, get_cpu());
  9799. + stats = per_cpu_ptr(fip->lp->stats, get_cpu_light());
  9800. list_for_each_entry_safe(fcf, next, &fip->fcfs, list) {
  9801. deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2;
  9802. @@ -867,7 +867,7 @@
  9803. sel_time = fcf->time;
  9804. }
  9805. }
  9806. - put_cpu();
  9807. + put_cpu_light();
  9808. list_for_each_entry_safe(fcf, next, &del_list, list) {
  9809. /* Removes fcf from current list */
  9810. diff -Nur linux-4.4.62.orig/drivers/scsi/libfc/fc_exch.c linux-4.4.62/drivers/scsi/libfc/fc_exch.c
  9811. --- linux-4.4.62.orig/drivers/scsi/libfc/fc_exch.c 2017-04-18 07:15:37.000000000 +0200
  9812. +++ linux-4.4.62/drivers/scsi/libfc/fc_exch.c 2017-04-18 17:38:08.062644126 +0200
  9813. @@ -814,10 +814,10 @@
  9814. }
  9815. memset(ep, 0, sizeof(*ep));
  9816. - cpu = get_cpu();
  9817. + cpu = get_cpu_light();
  9818. pool = per_cpu_ptr(mp->pool, cpu);
  9819. spin_lock_bh(&pool->lock);
  9820. - put_cpu();
  9821. + put_cpu_light();
  9822. /* peek cache of free slot */
  9823. if (pool->left != FC_XID_UNKNOWN) {
  9824. diff -Nur linux-4.4.62.orig/drivers/scsi/libsas/sas_ata.c linux-4.4.62/drivers/scsi/libsas/sas_ata.c
  9825. --- linux-4.4.62.orig/drivers/scsi/libsas/sas_ata.c 2017-04-18 07:15:37.000000000 +0200
  9826. +++ linux-4.4.62/drivers/scsi/libsas/sas_ata.c 2017-04-18 17:38:08.062644126 +0200
  9827. @@ -190,7 +190,7 @@
  9828. /* TODO: audit callers to ensure they are ready for qc_issue to
  9829. * unconditionally re-enable interrupts
  9830. */
  9831. - local_irq_save(flags);
  9832. + local_irq_save_nort(flags);
  9833. spin_unlock(ap->lock);
  9834. /* If the device fell off, no sense in issuing commands */
  9835. @@ -255,7 +255,7 @@
  9836. out:
  9837. spin_lock(ap->lock);
  9838. - local_irq_restore(flags);
  9839. + local_irq_restore_nort(flags);
  9840. return ret;
  9841. }
  9842. diff -Nur linux-4.4.62.orig/drivers/scsi/qla2xxx/qla_inline.h linux-4.4.62/drivers/scsi/qla2xxx/qla_inline.h
  9843. --- linux-4.4.62.orig/drivers/scsi/qla2xxx/qla_inline.h 2017-04-18 07:15:37.000000000 +0200
  9844. +++ linux-4.4.62/drivers/scsi/qla2xxx/qla_inline.h 2017-04-18 17:38:08.062644126 +0200
  9845. @@ -59,12 +59,12 @@
  9846. {
  9847. unsigned long flags;
  9848. struct qla_hw_data *ha = rsp->hw;
  9849. - local_irq_save(flags);
  9850. + local_irq_save_nort(flags);
  9851. if (IS_P3P_TYPE(ha))
  9852. qla82xx_poll(0, rsp);
  9853. else
  9854. ha->isp_ops->intr_handler(0, rsp);
  9855. - local_irq_restore(flags);
  9856. + local_irq_restore_nort(flags);
  9857. }
  9858. static inline uint8_t *
  9859. diff -Nur linux-4.4.62.orig/drivers/thermal/x86_pkg_temp_thermal.c linux-4.4.62/drivers/thermal/x86_pkg_temp_thermal.c
  9860. --- linux-4.4.62.orig/drivers/thermal/x86_pkg_temp_thermal.c 2017-04-18 07:15:37.000000000 +0200
  9861. +++ linux-4.4.62/drivers/thermal/x86_pkg_temp_thermal.c 2017-04-18 17:38:08.062644126 +0200
  9862. @@ -29,6 +29,7 @@
  9863. #include <linux/pm.h>
  9864. #include <linux/thermal.h>
  9865. #include <linux/debugfs.h>
  9866. +#include <linux/swork.h>
  9867. #include <asm/cpu_device_id.h>
  9868. #include <asm/mce.h>
  9869. @@ -352,7 +353,7 @@
  9870. }
  9871. }
  9872. -static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  9873. +static void platform_thermal_notify_work(struct swork_event *event)
  9874. {
  9875. unsigned long flags;
  9876. int cpu = smp_processor_id();
  9877. @@ -369,7 +370,7 @@
  9878. pkg_work_scheduled[phy_id]) {
  9879. disable_pkg_thres_interrupt();
  9880. spin_unlock_irqrestore(&pkg_work_lock, flags);
  9881. - return -EINVAL;
  9882. + return;
  9883. }
  9884. pkg_work_scheduled[phy_id] = 1;
  9885. spin_unlock_irqrestore(&pkg_work_lock, flags);
  9886. @@ -378,9 +379,48 @@
  9887. schedule_delayed_work_on(cpu,
  9888. &per_cpu(pkg_temp_thermal_threshold_work, cpu),
  9889. msecs_to_jiffies(notify_delay_ms));
  9890. +}
  9891. +
  9892. +#ifdef CONFIG_PREEMPT_RT_FULL
  9893. +static struct swork_event notify_work;
  9894. +
  9895. +static int thermal_notify_work_init(void)
  9896. +{
  9897. + int err;
  9898. +
  9899. + err = swork_get();
  9900. + if (err)
  9901. + return err;
  9902. +
  9903. + INIT_SWORK(&notify_work, platform_thermal_notify_work);
  9904. return 0;
  9905. }
  9906. +static void thermal_notify_work_cleanup(void)
  9907. +{
  9908. + swork_put();
  9909. +}
  9910. +
  9911. +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  9912. +{
  9913. + swork_queue(&notify_work);
  9914. + return 0;
  9915. +}
  9916. +
  9917. +#else /* !CONFIG_PREEMPT_RT_FULL */
  9918. +
  9919. +static int thermal_notify_work_init(void) { return 0; }
  9920. +
  9921. +static void thermal_notify_work_cleanup(void) { }
  9922. +
  9923. +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  9924. +{
  9925. + platform_thermal_notify_work(NULL);
  9926. +
  9927. + return 0;
  9928. +}
  9929. +#endif /* CONFIG_PREEMPT_RT_FULL */
  9930. +
  9931. static int find_siblings_cpu(int cpu)
  9932. {
  9933. int i;
  9934. @@ -584,6 +624,9 @@
  9935. if (!x86_match_cpu(pkg_temp_thermal_ids))
  9936. return -ENODEV;
  9937. + if (!thermal_notify_work_init())
  9938. + return -ENODEV;
  9939. +
  9940. spin_lock_init(&pkg_work_lock);
  9941. platform_thermal_package_notify =
  9942. pkg_temp_thermal_platform_thermal_notify;
  9943. @@ -608,7 +651,7 @@
  9944. kfree(pkg_work_scheduled);
  9945. platform_thermal_package_notify = NULL;
  9946. platform_thermal_package_rate_control = NULL;
  9947. -
  9948. + thermal_notify_work_cleanup();
  9949. return -ENODEV;
  9950. }
  9951. @@ -633,6 +676,7 @@
  9952. mutex_unlock(&phy_dev_list_mutex);
  9953. platform_thermal_package_notify = NULL;
  9954. platform_thermal_package_rate_control = NULL;
  9955. + thermal_notify_work_cleanup();
  9956. for_each_online_cpu(i)
  9957. cancel_delayed_work_sync(
  9958. &per_cpu(pkg_temp_thermal_threshold_work, i));
  9959. diff -Nur linux-4.4.62.orig/drivers/tty/serial/8250/8250_core.c linux-4.4.62/drivers/tty/serial/8250/8250_core.c
  9960. --- linux-4.4.62.orig/drivers/tty/serial/8250/8250_core.c 2017-04-18 07:15:37.000000000 +0200
  9961. +++ linux-4.4.62/drivers/tty/serial/8250/8250_core.c 2017-04-18 17:38:08.062644126 +0200
  9962. @@ -58,7 +58,16 @@
  9963. static unsigned int skip_txen_test; /* force skip of txen test at init time */
  9964. -#define PASS_LIMIT 512
  9965. +/*
  9966. + * On -rt we can have a more delays, and legitimately
  9967. + * so - so don't drop work spuriously and spam the
  9968. + * syslog:
  9969. + */
  9970. +#ifdef CONFIG_PREEMPT_RT_FULL
  9971. +# define PASS_LIMIT 1000000
  9972. +#else
  9973. +# define PASS_LIMIT 512
  9974. +#endif
  9975. #include <asm/serial.h>
  9976. /*
  9977. diff -Nur linux-4.4.62.orig/drivers/tty/serial/8250/8250_port.c linux-4.4.62/drivers/tty/serial/8250/8250_port.c
  9978. --- linux-4.4.62.orig/drivers/tty/serial/8250/8250_port.c 2017-04-18 07:15:37.000000000 +0200
  9979. +++ linux-4.4.62/drivers/tty/serial/8250/8250_port.c 2017-04-18 17:38:08.062644126 +0200
  9980. @@ -35,6 +35,7 @@
  9981. #include <linux/nmi.h>
  9982. #include <linux/mutex.h>
  9983. #include <linux/slab.h>
  9984. +#include <linux/kdb.h>
  9985. #include <linux/uaccess.h>
  9986. #include <linux/pm_runtime.h>
  9987. @@ -2843,9 +2844,9 @@
  9988. serial8250_rpm_get(up);
  9989. - if (port->sysrq)
  9990. + if (port->sysrq || oops_in_progress)
  9991. locked = 0;
  9992. - else if (oops_in_progress)
  9993. + else if (in_kdb_printk())
  9994. locked = spin_trylock_irqsave(&port->lock, flags);
  9995. else
  9996. spin_lock_irqsave(&port->lock, flags);
  9997. diff -Nur linux-4.4.62.orig/drivers/tty/serial/amba-pl011.c linux-4.4.62/drivers/tty/serial/amba-pl011.c
  9998. --- linux-4.4.62.orig/drivers/tty/serial/amba-pl011.c 2017-04-18 07:15:37.000000000 +0200
  9999. +++ linux-4.4.62/drivers/tty/serial/amba-pl011.c 2017-04-18 17:38:08.062644126 +0200
  10000. @@ -2067,13 +2067,19 @@
  10001. clk_enable(uap->clk);
  10002. - local_irq_save(flags);
  10003. + /*
  10004. + * local_irq_save(flags);
  10005. + *
  10006. + * This local_irq_save() is nonsense. If we come in via sysrq
  10007. + * handling then interrupts are already disabled. Aside of
  10008. + * that the port.sysrq check is racy on SMP regardless.
  10009. + */
  10010. if (uap->port.sysrq)
  10011. locked = 0;
  10012. else if (oops_in_progress)
  10013. - locked = spin_trylock(&uap->port.lock);
  10014. + locked = spin_trylock_irqsave(&uap->port.lock, flags);
  10015. else
  10016. - spin_lock(&uap->port.lock);
  10017. + spin_lock_irqsave(&uap->port.lock, flags);
  10018. /*
  10019. * First save the CR then disable the interrupts
  10020. @@ -2098,8 +2104,7 @@
  10021. writew(old_cr, uap->port.membase + UART011_CR);
  10022. if (locked)
  10023. - spin_unlock(&uap->port.lock);
  10024. - local_irq_restore(flags);
  10025. + spin_unlock_irqrestore(&uap->port.lock, flags);
  10026. clk_disable(uap->clk);
  10027. }
  10028. diff -Nur linux-4.4.62.orig/drivers/tty/serial/omap-serial.c linux-4.4.62/drivers/tty/serial/omap-serial.c
  10029. --- linux-4.4.62.orig/drivers/tty/serial/omap-serial.c 2017-04-18 07:15:37.000000000 +0200
  10030. +++ linux-4.4.62/drivers/tty/serial/omap-serial.c 2017-04-18 17:38:08.062644126 +0200
  10031. @@ -1257,13 +1257,10 @@
  10032. pm_runtime_get_sync(up->dev);
  10033. - local_irq_save(flags);
  10034. - if (up->port.sysrq)
  10035. - locked = 0;
  10036. - else if (oops_in_progress)
  10037. - locked = spin_trylock(&up->port.lock);
  10038. + if (up->port.sysrq || oops_in_progress)
  10039. + locked = spin_trylock_irqsave(&up->port.lock, flags);
  10040. else
  10041. - spin_lock(&up->port.lock);
  10042. + spin_lock_irqsave(&up->port.lock, flags);
  10043. /*
  10044. * First save the IER then disable the interrupts
  10045. @@ -1292,8 +1289,7 @@
  10046. pm_runtime_mark_last_busy(up->dev);
  10047. pm_runtime_put_autosuspend(up->dev);
  10048. if (locked)
  10049. - spin_unlock(&up->port.lock);
  10050. - local_irq_restore(flags);
  10051. + spin_unlock_irqrestore(&up->port.lock, flags);
  10052. }
  10053. static int __init
  10054. diff -Nur linux-4.4.62.orig/drivers/usb/core/hcd.c linux-4.4.62/drivers/usb/core/hcd.c
  10055. --- linux-4.4.62.orig/drivers/usb/core/hcd.c 2017-04-18 07:15:37.000000000 +0200
  10056. +++ linux-4.4.62/drivers/usb/core/hcd.c 2017-04-18 17:38:08.066644281 +0200
  10057. @@ -1738,9 +1738,9 @@
  10058. * and no one may trigger the above deadlock situation when
  10059. * running complete() in tasklet.
  10060. */
  10061. - local_irq_save(flags);
  10062. + local_irq_save_nort(flags);
  10063. urb->complete(urb);
  10064. - local_irq_restore(flags);
  10065. + local_irq_restore_nort(flags);
  10066. usb_anchor_resume_wakeups(anchor);
  10067. atomic_dec(&urb->use_count);
  10068. diff -Nur linux-4.4.62.orig/drivers/usb/gadget/function/f_fs.c linux-4.4.62/drivers/usb/gadget/function/f_fs.c
  10069. --- linux-4.4.62.orig/drivers/usb/gadget/function/f_fs.c 2017-04-18 07:15:37.000000000 +0200
  10070. +++ linux-4.4.62/drivers/usb/gadget/function/f_fs.c 2017-04-18 17:38:08.066644281 +0200
  10071. @@ -1404,7 +1404,7 @@
  10072. pr_info("%s(): freeing\n", __func__);
  10073. ffs_data_clear(ffs);
  10074. BUG_ON(waitqueue_active(&ffs->ev.waitq) ||
  10075. - waitqueue_active(&ffs->ep0req_completion.wait));
  10076. + swait_active(&ffs->ep0req_completion.wait));
  10077. kfree(ffs->dev_name);
  10078. kfree(ffs);
  10079. }
  10080. diff -Nur linux-4.4.62.orig/drivers/usb/gadget/legacy/inode.c linux-4.4.62/drivers/usb/gadget/legacy/inode.c
  10081. --- linux-4.4.62.orig/drivers/usb/gadget/legacy/inode.c 2017-04-18 07:15:37.000000000 +0200
  10082. +++ linux-4.4.62/drivers/usb/gadget/legacy/inode.c 2017-04-18 17:38:08.066644281 +0200
  10083. @@ -345,7 +345,7 @@
  10084. spin_unlock_irq (&epdata->dev->lock);
  10085. if (likely (value == 0)) {
  10086. - value = wait_event_interruptible (done.wait, done.done);
  10087. + value = swait_event_interruptible (done.wait, done.done);
  10088. if (value != 0) {
  10089. spin_lock_irq (&epdata->dev->lock);
  10090. if (likely (epdata->ep != NULL)) {
  10091. @@ -354,7 +354,7 @@
  10092. usb_ep_dequeue (epdata->ep, epdata->req);
  10093. spin_unlock_irq (&epdata->dev->lock);
  10094. - wait_event (done.wait, done.done);
  10095. + swait_event (done.wait, done.done);
  10096. if (epdata->status == -ECONNRESET)
  10097. epdata->status = -EINTR;
  10098. } else {
  10099. diff -Nur linux-4.4.62.orig/drivers/usb/gadget/udc/atmel_usba_udc.c linux-4.4.62/drivers/usb/gadget/udc/atmel_usba_udc.c
  10100. --- linux-4.4.62.orig/drivers/usb/gadget/udc/atmel_usba_udc.c 2017-04-18 07:15:37.000000000 +0200
  10101. +++ linux-4.4.62/drivers/usb/gadget/udc/atmel_usba_udc.c 2017-04-18 17:38:08.066644281 +0200
  10102. @@ -17,7 +17,9 @@
  10103. #include <linux/device.h>
  10104. #include <linux/dma-mapping.h>
  10105. #include <linux/list.h>
  10106. +#include <linux/mfd/syscon.h>
  10107. #include <linux/platform_device.h>
  10108. +#include <linux/regmap.h>
  10109. #include <linux/usb/ch9.h>
  10110. #include <linux/usb/gadget.h>
  10111. #include <linux/usb/atmel_usba_udc.h>
  10112. @@ -1888,20 +1890,15 @@
  10113. #ifdef CONFIG_OF
  10114. static void at91sam9rl_toggle_bias(struct usba_udc *udc, int is_on)
  10115. {
  10116. - unsigned int uckr = at91_pmc_read(AT91_CKGR_UCKR);
  10117. -
  10118. - if (is_on)
  10119. - at91_pmc_write(AT91_CKGR_UCKR, uckr | AT91_PMC_BIASEN);
  10120. - else
  10121. - at91_pmc_write(AT91_CKGR_UCKR, uckr & ~(AT91_PMC_BIASEN));
  10122. + regmap_update_bits(udc->pmc, AT91_CKGR_UCKR, AT91_PMC_BIASEN,
  10123. + is_on ? AT91_PMC_BIASEN : 0);
  10124. }
  10125. static void at91sam9g45_pulse_bias(struct usba_udc *udc)
  10126. {
  10127. - unsigned int uckr = at91_pmc_read(AT91_CKGR_UCKR);
  10128. -
  10129. - at91_pmc_write(AT91_CKGR_UCKR, uckr & ~(AT91_PMC_BIASEN));
  10130. - at91_pmc_write(AT91_CKGR_UCKR, uckr | AT91_PMC_BIASEN);
  10131. + regmap_update_bits(udc->pmc, AT91_CKGR_UCKR, AT91_PMC_BIASEN, 0);
  10132. + regmap_update_bits(udc->pmc, AT91_CKGR_UCKR, AT91_PMC_BIASEN,
  10133. + AT91_PMC_BIASEN);
  10134. }
  10135. static const struct usba_udc_errata at91sam9rl_errata = {
  10136. @@ -1938,6 +1935,9 @@
  10137. return ERR_PTR(-EINVAL);
  10138. udc->errata = match->data;
  10139. + udc->pmc = syscon_regmap_lookup_by_compatible("atmel,at91sam9g45-pmc");
  10140. + if (udc->errata && IS_ERR(udc->pmc))
  10141. + return ERR_CAST(udc->pmc);
  10142. udc->num_ep = 0;
  10143. diff -Nur linux-4.4.62.orig/drivers/usb/gadget/udc/atmel_usba_udc.h linux-4.4.62/drivers/usb/gadget/udc/atmel_usba_udc.h
  10144. --- linux-4.4.62.orig/drivers/usb/gadget/udc/atmel_usba_udc.h 2017-04-18 07:15:37.000000000 +0200
  10145. +++ linux-4.4.62/drivers/usb/gadget/udc/atmel_usba_udc.h 2017-04-18 17:38:08.066644281 +0200
  10146. @@ -354,6 +354,8 @@
  10147. struct dentry *debugfs_root;
  10148. struct dentry *debugfs_regs;
  10149. #endif
  10150. +
  10151. + struct regmap *pmc;
  10152. };
  10153. static inline struct usba_ep *to_usba_ep(struct usb_ep *ep)
  10154. diff -Nur linux-4.4.62.orig/fs/aio.c linux-4.4.62/fs/aio.c
  10155. --- linux-4.4.62.orig/fs/aio.c 2017-04-18 07:15:37.000000000 +0200
  10156. +++ linux-4.4.62/fs/aio.c 2017-04-18 17:38:08.066644281 +0200
  10157. @@ -40,6 +40,7 @@
  10158. #include <linux/ramfs.h>
  10159. #include <linux/percpu-refcount.h>
  10160. #include <linux/mount.h>
  10161. +#include <linux/swork.h>
  10162. #include <asm/kmap_types.h>
  10163. #include <asm/uaccess.h>
  10164. @@ -115,7 +116,7 @@
  10165. struct page **ring_pages;
  10166. long nr_pages;
  10167. - struct work_struct free_work;
  10168. + struct swork_event free_work;
  10169. /*
  10170. * signals when all in-flight requests are done
  10171. @@ -258,6 +259,7 @@
  10172. .mount = aio_mount,
  10173. .kill_sb = kill_anon_super,
  10174. };
  10175. + BUG_ON(swork_get());
  10176. aio_mnt = kern_mount(&aio_fs);
  10177. if (IS_ERR(aio_mnt))
  10178. panic("Failed to create aio fs mount.");
  10179. @@ -573,9 +575,9 @@
  10180. return cancel(&kiocb->common);
  10181. }
  10182. -static void free_ioctx(struct work_struct *work)
  10183. +static void free_ioctx(struct swork_event *sev)
  10184. {
  10185. - struct kioctx *ctx = container_of(work, struct kioctx, free_work);
  10186. + struct kioctx *ctx = container_of(sev, struct kioctx, free_work);
  10187. pr_debug("freeing %p\n", ctx);
  10188. @@ -594,8 +596,8 @@
  10189. if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
  10190. complete(&ctx->rq_wait->comp);
  10191. - INIT_WORK(&ctx->free_work, free_ioctx);
  10192. - schedule_work(&ctx->free_work);
  10193. + INIT_SWORK(&ctx->free_work, free_ioctx);
  10194. + swork_queue(&ctx->free_work);
  10195. }
  10196. /*
  10197. @@ -603,9 +605,9 @@
  10198. * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
  10199. * now it's safe to cancel any that need to be.
  10200. */
  10201. -static void free_ioctx_users(struct percpu_ref *ref)
  10202. +static void free_ioctx_users_work(struct swork_event *sev)
  10203. {
  10204. - struct kioctx *ctx = container_of(ref, struct kioctx, users);
  10205. + struct kioctx *ctx = container_of(sev, struct kioctx, free_work);
  10206. struct aio_kiocb *req;
  10207. spin_lock_irq(&ctx->ctx_lock);
  10208. @@ -624,6 +626,14 @@
  10209. percpu_ref_put(&ctx->reqs);
  10210. }
  10211. +static void free_ioctx_users(struct percpu_ref *ref)
  10212. +{
  10213. + struct kioctx *ctx = container_of(ref, struct kioctx, users);
  10214. +
  10215. + INIT_SWORK(&ctx->free_work, free_ioctx_users_work);
  10216. + swork_queue(&ctx->free_work);
  10217. +}
  10218. +
  10219. static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
  10220. {
  10221. unsigned i, new_nr;
  10222. diff -Nur linux-4.4.62.orig/fs/autofs4/autofs_i.h linux-4.4.62/fs/autofs4/autofs_i.h
  10223. --- linux-4.4.62.orig/fs/autofs4/autofs_i.h 2017-04-18 07:15:37.000000000 +0200
  10224. +++ linux-4.4.62/fs/autofs4/autofs_i.h 2017-04-18 17:38:08.066644281 +0200
  10225. @@ -34,6 +34,7 @@
  10226. #include <linux/sched.h>
  10227. #include <linux/mount.h>
  10228. #include <linux/namei.h>
  10229. +#include <linux/delay.h>
  10230. #include <asm/current.h>
  10231. #include <asm/uaccess.h>
  10232. diff -Nur linux-4.4.62.orig/fs/autofs4/expire.c linux-4.4.62/fs/autofs4/expire.c
  10233. --- linux-4.4.62.orig/fs/autofs4/expire.c 2017-04-18 07:15:37.000000000 +0200
  10234. +++ linux-4.4.62/fs/autofs4/expire.c 2017-04-18 17:38:08.066644281 +0200
  10235. @@ -150,7 +150,7 @@
  10236. parent = p->d_parent;
  10237. if (!spin_trylock(&parent->d_lock)) {
  10238. spin_unlock(&p->d_lock);
  10239. - cpu_relax();
  10240. + cpu_chill();
  10241. goto relock;
  10242. }
  10243. spin_unlock(&p->d_lock);
  10244. diff -Nur linux-4.4.62.orig/fs/buffer.c linux-4.4.62/fs/buffer.c
  10245. --- linux-4.4.62.orig/fs/buffer.c 2017-04-18 07:15:37.000000000 +0200
  10246. +++ linux-4.4.62/fs/buffer.c 2017-04-18 17:38:08.066644281 +0200
  10247. @@ -305,8 +305,7 @@
  10248. * decide that the page is now completely done.
  10249. */
  10250. first = page_buffers(page);
  10251. - local_irq_save(flags);
  10252. - bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
  10253. + flags = bh_uptodate_lock_irqsave(first);
  10254. clear_buffer_async_read(bh);
  10255. unlock_buffer(bh);
  10256. tmp = bh;
  10257. @@ -319,8 +318,7 @@
  10258. }
  10259. tmp = tmp->b_this_page;
  10260. } while (tmp != bh);
  10261. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  10262. - local_irq_restore(flags);
  10263. + bh_uptodate_unlock_irqrestore(first, flags);
  10264. /*
  10265. * If none of the buffers had errors and they are all
  10266. @@ -332,9 +330,7 @@
  10267. return;
  10268. still_busy:
  10269. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  10270. - local_irq_restore(flags);
  10271. - return;
  10272. + bh_uptodate_unlock_irqrestore(first, flags);
  10273. }
  10274. /*
  10275. @@ -362,8 +358,7 @@
  10276. }
  10277. first = page_buffers(page);
  10278. - local_irq_save(flags);
  10279. - bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
  10280. + flags = bh_uptodate_lock_irqsave(first);
  10281. clear_buffer_async_write(bh);
  10282. unlock_buffer(bh);
  10283. @@ -375,15 +370,12 @@
  10284. }
  10285. tmp = tmp->b_this_page;
  10286. }
  10287. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  10288. - local_irq_restore(flags);
  10289. + bh_uptodate_unlock_irqrestore(first, flags);
  10290. end_page_writeback(page);
  10291. return;
  10292. still_busy:
  10293. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  10294. - local_irq_restore(flags);
  10295. - return;
  10296. + bh_uptodate_unlock_irqrestore(first, flags);
  10297. }
  10298. EXPORT_SYMBOL(end_buffer_async_write);
  10299. @@ -3325,6 +3317,7 @@
  10300. struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
  10301. if (ret) {
  10302. INIT_LIST_HEAD(&ret->b_assoc_buffers);
  10303. + buffer_head_init_locks(ret);
  10304. preempt_disable();
  10305. __this_cpu_inc(bh_accounting.nr);
  10306. recalc_bh_state();
  10307. diff -Nur linux-4.4.62.orig/fs/dcache.c linux-4.4.62/fs/dcache.c
  10308. --- linux-4.4.62.orig/fs/dcache.c 2017-04-18 07:15:37.000000000 +0200
  10309. +++ linux-4.4.62/fs/dcache.c 2017-04-18 17:38:08.066644281 +0200
  10310. @@ -19,6 +19,7 @@
  10311. #include <linux/mm.h>
  10312. #include <linux/fs.h>
  10313. #include <linux/fsnotify.h>
  10314. +#include <linux/delay.h>
  10315. #include <linux/slab.h>
  10316. #include <linux/init.h>
  10317. #include <linux/hash.h>
  10318. @@ -747,6 +748,8 @@
  10319. */
  10320. void dput(struct dentry *dentry)
  10321. {
  10322. + struct dentry *parent;
  10323. +
  10324. if (unlikely(!dentry))
  10325. return;
  10326. @@ -783,9 +786,18 @@
  10327. return;
  10328. kill_it:
  10329. - dentry = dentry_kill(dentry);
  10330. - if (dentry) {
  10331. - cond_resched();
  10332. + parent = dentry_kill(dentry);
  10333. + if (parent) {
  10334. + int r;
  10335. +
  10336. + if (parent == dentry) {
  10337. + /* the task with the highest priority won't schedule */
  10338. + r = cond_resched();
  10339. + if (!r)
  10340. + cpu_chill();
  10341. + } else {
  10342. + dentry = parent;
  10343. + }
  10344. goto repeat;
  10345. }
  10346. }
  10347. @@ -2397,7 +2409,7 @@
  10348. if (dentry->d_lockref.count == 1) {
  10349. if (!spin_trylock(&inode->i_lock)) {
  10350. spin_unlock(&dentry->d_lock);
  10351. - cpu_relax();
  10352. + cpu_chill();
  10353. goto again;
  10354. }
  10355. dentry->d_flags &= ~DCACHE_CANT_MOUNT;
  10356. diff -Nur linux-4.4.62.orig/fs/eventpoll.c linux-4.4.62/fs/eventpoll.c
  10357. --- linux-4.4.62.orig/fs/eventpoll.c 2017-04-18 07:15:37.000000000 +0200
  10358. +++ linux-4.4.62/fs/eventpoll.c 2017-04-18 17:38:08.066644281 +0200
  10359. @@ -505,12 +505,12 @@
  10360. */
  10361. static void ep_poll_safewake(wait_queue_head_t *wq)
  10362. {
  10363. - int this_cpu = get_cpu();
  10364. + int this_cpu = get_cpu_light();
  10365. ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
  10366. ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
  10367. - put_cpu();
  10368. + put_cpu_light();
  10369. }
  10370. static void ep_remove_wait_queue(struct eppoll_entry *pwq)
  10371. diff -Nur linux-4.4.62.orig/fs/exec.c linux-4.4.62/fs/exec.c
  10372. --- linux-4.4.62.orig/fs/exec.c 2017-04-18 07:15:37.000000000 +0200
  10373. +++ linux-4.4.62/fs/exec.c 2017-04-18 17:38:08.066644281 +0200
  10374. @@ -866,12 +866,14 @@
  10375. }
  10376. }
  10377. task_lock(tsk);
  10378. + preempt_disable_rt();
  10379. active_mm = tsk->active_mm;
  10380. tsk->mm = mm;
  10381. tsk->active_mm = mm;
  10382. activate_mm(active_mm, mm);
  10383. tsk->mm->vmacache_seqnum = 0;
  10384. vmacache_flush(tsk);
  10385. + preempt_enable_rt();
  10386. task_unlock(tsk);
  10387. if (old_mm) {
  10388. up_read(&old_mm->mmap_sem);
  10389. diff -Nur linux-4.4.62.orig/fs/f2fs/f2fs.h linux-4.4.62/fs/f2fs/f2fs.h
  10390. --- linux-4.4.62.orig/fs/f2fs/f2fs.h 2017-04-18 07:15:37.000000000 +0200
  10391. +++ linux-4.4.62/fs/f2fs/f2fs.h 2017-04-18 17:38:08.086645056 +0200
  10392. @@ -24,7 +24,6 @@
  10393. #ifdef CONFIG_F2FS_CHECK_FS
  10394. #define f2fs_bug_on(sbi, condition) BUG_ON(condition)
  10395. -#define f2fs_down_write(x, y) down_write_nest_lock(x, y)
  10396. #else
  10397. #define f2fs_bug_on(sbi, condition) \
  10398. do { \
  10399. @@ -33,7 +32,6 @@
  10400. set_sbi_flag(sbi, SBI_NEED_FSCK); \
  10401. } \
  10402. } while (0)
  10403. -#define f2fs_down_write(x, y) down_write(x)
  10404. #endif
  10405. /*
  10406. @@ -959,7 +957,7 @@
  10407. static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
  10408. {
  10409. - f2fs_down_write(&sbi->cp_rwsem, &sbi->cp_mutex);
  10410. + down_write(&sbi->cp_rwsem);
  10411. }
  10412. static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
  10413. diff -Nur linux-4.4.62.orig/fs/jbd2/checkpoint.c linux-4.4.62/fs/jbd2/checkpoint.c
  10414. --- linux-4.4.62.orig/fs/jbd2/checkpoint.c 2017-04-18 07:15:37.000000000 +0200
  10415. +++ linux-4.4.62/fs/jbd2/checkpoint.c 2017-04-18 17:38:08.106645833 +0200
  10416. @@ -116,6 +116,8 @@
  10417. nblocks = jbd2_space_needed(journal);
  10418. while (jbd2_log_space_left(journal) < nblocks) {
  10419. write_unlock(&journal->j_state_lock);
  10420. + if (current->plug)
  10421. + io_schedule();
  10422. mutex_lock(&journal->j_checkpoint_mutex);
  10423. /*
  10424. diff -Nur linux-4.4.62.orig/fs/namespace.c linux-4.4.62/fs/namespace.c
  10425. --- linux-4.4.62.orig/fs/namespace.c 2017-04-18 07:15:37.000000000 +0200
  10426. +++ linux-4.4.62/fs/namespace.c 2017-04-18 17:38:08.122646453 +0200
  10427. @@ -14,6 +14,7 @@
  10428. #include <linux/mnt_namespace.h>
  10429. #include <linux/user_namespace.h>
  10430. #include <linux/namei.h>
  10431. +#include <linux/delay.h>
  10432. #include <linux/security.h>
  10433. #include <linux/idr.h>
  10434. #include <linux/init.h> /* init_rootfs */
  10435. @@ -353,8 +354,11 @@
  10436. * incremented count after it has set MNT_WRITE_HOLD.
  10437. */
  10438. smp_mb();
  10439. - while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
  10440. - cpu_relax();
  10441. + while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
  10442. + preempt_enable();
  10443. + cpu_chill();
  10444. + preempt_disable();
  10445. + }
  10446. /*
  10447. * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
  10448. * be set to match its requirements. So we must not load that until
  10449. diff -Nur linux-4.4.62.orig/fs/ntfs/aops.c linux-4.4.62/fs/ntfs/aops.c
  10450. --- linux-4.4.62.orig/fs/ntfs/aops.c 2017-04-18 07:15:37.000000000 +0200
  10451. +++ linux-4.4.62/fs/ntfs/aops.c 2017-04-18 17:38:08.122646453 +0200
  10452. @@ -107,8 +107,7 @@
  10453. "0x%llx.", (unsigned long long)bh->b_blocknr);
  10454. }
  10455. first = page_buffers(page);
  10456. - local_irq_save(flags);
  10457. - bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
  10458. + flags = bh_uptodate_lock_irqsave(first);
  10459. clear_buffer_async_read(bh);
  10460. unlock_buffer(bh);
  10461. tmp = bh;
  10462. @@ -123,8 +122,7 @@
  10463. }
  10464. tmp = tmp->b_this_page;
  10465. } while (tmp != bh);
  10466. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  10467. - local_irq_restore(flags);
  10468. + bh_uptodate_unlock_irqrestore(first, flags);
  10469. /*
  10470. * If none of the buffers had errors then we can set the page uptodate,
  10471. * but we first have to perform the post read mst fixups, if the
  10472. @@ -145,13 +143,13 @@
  10473. recs = PAGE_CACHE_SIZE / rec_size;
  10474. /* Should have been verified before we got here... */
  10475. BUG_ON(!recs);
  10476. - local_irq_save(flags);
  10477. + local_irq_save_nort(flags);
  10478. kaddr = kmap_atomic(page);
  10479. for (i = 0; i < recs; i++)
  10480. post_read_mst_fixup((NTFS_RECORD*)(kaddr +
  10481. i * rec_size), rec_size);
  10482. kunmap_atomic(kaddr);
  10483. - local_irq_restore(flags);
  10484. + local_irq_restore_nort(flags);
  10485. flush_dcache_page(page);
  10486. if (likely(page_uptodate && !PageError(page)))
  10487. SetPageUptodate(page);
  10488. @@ -159,9 +157,7 @@
  10489. unlock_page(page);
  10490. return;
  10491. still_busy:
  10492. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  10493. - local_irq_restore(flags);
  10494. - return;
  10495. + bh_uptodate_unlock_irqrestore(first, flags);
  10496. }
  10497. /**
  10498. diff -Nur linux-4.4.62.orig/fs/timerfd.c linux-4.4.62/fs/timerfd.c
  10499. --- linux-4.4.62.orig/fs/timerfd.c 2017-04-18 07:15:37.000000000 +0200
  10500. +++ linux-4.4.62/fs/timerfd.c 2017-04-18 17:38:08.122646453 +0200
  10501. @@ -450,7 +450,10 @@
  10502. break;
  10503. }
  10504. spin_unlock_irq(&ctx->wqh.lock);
  10505. - cpu_relax();
  10506. + if (isalarm(ctx))
  10507. + hrtimer_wait_for_timer(&ctx->t.alarm.timer);
  10508. + else
  10509. + hrtimer_wait_for_timer(&ctx->t.tmr);
  10510. }
  10511. /*
  10512. diff -Nur linux-4.4.62.orig/include/acpi/platform/aclinux.h linux-4.4.62/include/acpi/platform/aclinux.h
  10513. --- linux-4.4.62.orig/include/acpi/platform/aclinux.h 2017-04-18 07:15:37.000000000 +0200
  10514. +++ linux-4.4.62/include/acpi/platform/aclinux.h 2017-04-18 17:38:08.122646453 +0200
  10515. @@ -127,6 +127,7 @@
  10516. #define acpi_cache_t struct kmem_cache
  10517. #define acpi_spinlock spinlock_t *
  10518. +#define acpi_raw_spinlock raw_spinlock_t *
  10519. #define acpi_cpu_flags unsigned long
  10520. /* Use native linux version of acpi_os_allocate_zeroed */
  10521. @@ -145,6 +146,20 @@
  10522. #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_thread_id
  10523. #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_lock
  10524. +#define acpi_os_create_raw_lock(__handle) \
  10525. +({ \
  10526. + raw_spinlock_t *lock = ACPI_ALLOCATE(sizeof(*lock)); \
  10527. + \
  10528. + if (lock) { \
  10529. + *(__handle) = lock; \
  10530. + raw_spin_lock_init(*(__handle)); \
  10531. + } \
  10532. + lock ? AE_OK : AE_NO_MEMORY; \
  10533. + })
  10534. +
  10535. +#define acpi_os_delete_raw_lock(__handle) kfree(__handle)
  10536. +
  10537. +
  10538. /*
  10539. * OSL interfaces used by debugger/disassembler
  10540. */
  10541. diff -Nur linux-4.4.62.orig/include/asm-generic/bug.h linux-4.4.62/include/asm-generic/bug.h
  10542. --- linux-4.4.62.orig/include/asm-generic/bug.h 2017-04-18 07:15:37.000000000 +0200
  10543. +++ linux-4.4.62/include/asm-generic/bug.h 2017-04-18 17:38:08.122646453 +0200
  10544. @@ -206,6 +206,20 @@
  10545. # define WARN_ON_SMP(x) ({0;})
  10546. #endif
  10547. +#ifdef CONFIG_PREEMPT_RT_BASE
  10548. +# define BUG_ON_RT(c) BUG_ON(c)
  10549. +# define BUG_ON_NONRT(c) do { } while (0)
  10550. +# define WARN_ON_RT(condition) WARN_ON(condition)
  10551. +# define WARN_ON_NONRT(condition) do { } while (0)
  10552. +# define WARN_ON_ONCE_NONRT(condition) do { } while (0)
  10553. +#else
  10554. +# define BUG_ON_RT(c) do { } while (0)
  10555. +# define BUG_ON_NONRT(c) BUG_ON(c)
  10556. +# define WARN_ON_RT(condition) do { } while (0)
  10557. +# define WARN_ON_NONRT(condition) WARN_ON(condition)
  10558. +# define WARN_ON_ONCE_NONRT(condition) WARN_ON_ONCE(condition)
  10559. +#endif
  10560. +
  10561. #endif /* __ASSEMBLY__ */
  10562. #endif
  10563. diff -Nur linux-4.4.62.orig/include/asm-generic/preempt.h linux-4.4.62/include/asm-generic/preempt.h
  10564. --- linux-4.4.62.orig/include/asm-generic/preempt.h 2017-04-18 07:15:37.000000000 +0200
  10565. +++ linux-4.4.62/include/asm-generic/preempt.h 2017-04-18 17:38:08.122646453 +0200
  10566. @@ -7,10 +7,10 @@
  10567. static __always_inline int preempt_count(void)
  10568. {
  10569. - return current_thread_info()->preempt_count;
  10570. + return READ_ONCE(current_thread_info()->preempt_count);
  10571. }
  10572. -static __always_inline int *preempt_count_ptr(void)
  10573. +static __always_inline volatile int *preempt_count_ptr(void)
  10574. {
  10575. return &current_thread_info()->preempt_count;
  10576. }
  10577. diff -Nur linux-4.4.62.orig/include/linux/blkdev.h linux-4.4.62/include/linux/blkdev.h
  10578. --- linux-4.4.62.orig/include/linux/blkdev.h 2017-04-18 07:15:37.000000000 +0200
  10579. +++ linux-4.4.62/include/linux/blkdev.h 2017-04-18 17:38:08.122646453 +0200
  10580. @@ -89,6 +89,7 @@
  10581. struct list_head queuelist;
  10582. union {
  10583. struct call_single_data csd;
  10584. + struct work_struct work;
  10585. unsigned long fifo_time;
  10586. };
  10587. @@ -455,7 +456,7 @@
  10588. struct throtl_data *td;
  10589. #endif
  10590. struct rcu_head rcu_head;
  10591. - wait_queue_head_t mq_freeze_wq;
  10592. + struct swait_queue_head mq_freeze_wq;
  10593. struct percpu_ref q_usage_counter;
  10594. struct list_head all_q_node;
  10595. diff -Nur linux-4.4.62.orig/include/linux/blk-mq.h linux-4.4.62/include/linux/blk-mq.h
  10596. --- linux-4.4.62.orig/include/linux/blk-mq.h 2017-04-18 07:15:37.000000000 +0200
  10597. +++ linux-4.4.62/include/linux/blk-mq.h 2017-04-18 17:38:08.122646453 +0200
  10598. @@ -212,6 +212,7 @@
  10599. struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
  10600. struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
  10601. +void __blk_mq_complete_request_remote_work(struct work_struct *work);
  10602. int blk_mq_request_started(struct request *rq);
  10603. void blk_mq_start_request(struct request *rq);
  10604. diff -Nur linux-4.4.62.orig/include/linux/bottom_half.h linux-4.4.62/include/linux/bottom_half.h
  10605. --- linux-4.4.62.orig/include/linux/bottom_half.h 2017-04-18 07:15:37.000000000 +0200
  10606. +++ linux-4.4.62/include/linux/bottom_half.h 2017-04-18 17:38:08.122646453 +0200
  10607. @@ -3,6 +3,39 @@
  10608. #include <linux/preempt.h>
  10609. +#ifdef CONFIG_PREEMPT_RT_FULL
  10610. +
  10611. +extern void __local_bh_disable(void);
  10612. +extern void _local_bh_enable(void);
  10613. +extern void __local_bh_enable(void);
  10614. +
  10615. +static inline void local_bh_disable(void)
  10616. +{
  10617. + __local_bh_disable();
  10618. +}
  10619. +
  10620. +static inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
  10621. +{
  10622. + __local_bh_disable();
  10623. +}
  10624. +
  10625. +static inline void local_bh_enable(void)
  10626. +{
  10627. + __local_bh_enable();
  10628. +}
  10629. +
  10630. +static inline void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
  10631. +{
  10632. + __local_bh_enable();
  10633. +}
  10634. +
  10635. +static inline void local_bh_enable_ip(unsigned long ip)
  10636. +{
  10637. + __local_bh_enable();
  10638. +}
  10639. +
  10640. +#else
  10641. +
  10642. #ifdef CONFIG_TRACE_IRQFLAGS
  10643. extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt);
  10644. #else
  10645. @@ -30,5 +63,6 @@
  10646. {
  10647. __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
  10648. }
  10649. +#endif
  10650. #endif /* _LINUX_BH_H */
  10651. diff -Nur linux-4.4.62.orig/include/linux/buffer_head.h linux-4.4.62/include/linux/buffer_head.h
  10652. --- linux-4.4.62.orig/include/linux/buffer_head.h 2017-04-18 07:15:37.000000000 +0200
  10653. +++ linux-4.4.62/include/linux/buffer_head.h 2017-04-18 17:38:08.122646453 +0200
  10654. @@ -75,8 +75,50 @@
  10655. struct address_space *b_assoc_map; /* mapping this buffer is
  10656. associated with */
  10657. atomic_t b_count; /* users using this buffer_head */
  10658. +#ifdef CONFIG_PREEMPT_RT_BASE
  10659. + spinlock_t b_uptodate_lock;
  10660. +#if IS_ENABLED(CONFIG_JBD2)
  10661. + spinlock_t b_state_lock;
  10662. + spinlock_t b_journal_head_lock;
  10663. +#endif
  10664. +#endif
  10665. };
  10666. +static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh)
  10667. +{
  10668. + unsigned long flags;
  10669. +
  10670. +#ifndef CONFIG_PREEMPT_RT_BASE
  10671. + local_irq_save(flags);
  10672. + bit_spin_lock(BH_Uptodate_Lock, &bh->b_state);
  10673. +#else
  10674. + spin_lock_irqsave(&bh->b_uptodate_lock, flags);
  10675. +#endif
  10676. + return flags;
  10677. +}
  10678. +
  10679. +static inline void
  10680. +bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags)
  10681. +{
  10682. +#ifndef CONFIG_PREEMPT_RT_BASE
  10683. + bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state);
  10684. + local_irq_restore(flags);
  10685. +#else
  10686. + spin_unlock_irqrestore(&bh->b_uptodate_lock, flags);
  10687. +#endif
  10688. +}
  10689. +
  10690. +static inline void buffer_head_init_locks(struct buffer_head *bh)
  10691. +{
  10692. +#ifdef CONFIG_PREEMPT_RT_BASE
  10693. + spin_lock_init(&bh->b_uptodate_lock);
  10694. +#if IS_ENABLED(CONFIG_JBD2)
  10695. + spin_lock_init(&bh->b_state_lock);
  10696. + spin_lock_init(&bh->b_journal_head_lock);
  10697. +#endif
  10698. +#endif
  10699. +}
  10700. +
  10701. /*
  10702. * macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
  10703. * and buffer_foo() functions.
  10704. diff -Nur linux-4.4.62.orig/include/linux/cgroup-defs.h linux-4.4.62/include/linux/cgroup-defs.h
  10705. --- linux-4.4.62.orig/include/linux/cgroup-defs.h 2017-04-18 07:15:37.000000000 +0200
  10706. +++ linux-4.4.62/include/linux/cgroup-defs.h 2017-04-18 17:38:08.122646453 +0200
  10707. @@ -16,6 +16,7 @@
  10708. #include <linux/percpu-refcount.h>
  10709. #include <linux/percpu-rwsem.h>
  10710. #include <linux/workqueue.h>
  10711. +#include <linux/swork.h>
  10712. #ifdef CONFIG_CGROUPS
  10713. @@ -142,6 +143,7 @@
  10714. /* percpu_ref killing and RCU release */
  10715. struct rcu_head rcu_head;
  10716. struct work_struct destroy_work;
  10717. + struct swork_event destroy_swork;
  10718. };
  10719. /*
  10720. diff -Nur linux-4.4.62.orig/include/linux/clk/at91_pmc.h linux-4.4.62/include/linux/clk/at91_pmc.h
  10721. --- linux-4.4.62.orig/include/linux/clk/at91_pmc.h 2017-04-18 07:15:37.000000000 +0200
  10722. +++ linux-4.4.62/include/linux/clk/at91_pmc.h 2017-04-18 17:38:08.122646453 +0200
  10723. @@ -16,18 +16,6 @@
  10724. #ifndef AT91_PMC_H
  10725. #define AT91_PMC_H
  10726. -#ifndef __ASSEMBLY__
  10727. -extern void __iomem *at91_pmc_base;
  10728. -
  10729. -#define at91_pmc_read(field) \
  10730. - readl_relaxed(at91_pmc_base + field)
  10731. -
  10732. -#define at91_pmc_write(field, value) \
  10733. - writel_relaxed(value, at91_pmc_base + field)
  10734. -#else
  10735. -.extern at91_pmc_base
  10736. -#endif
  10737. -
  10738. #define AT91_PMC_SCER 0x00 /* System Clock Enable Register */
  10739. #define AT91_PMC_SCDR 0x04 /* System Clock Disable Register */
  10740. diff -Nur linux-4.4.62.orig/include/linux/completion.h linux-4.4.62/include/linux/completion.h
  10741. --- linux-4.4.62.orig/include/linux/completion.h 2017-04-18 07:15:37.000000000 +0200
  10742. +++ linux-4.4.62/include/linux/completion.h 2017-04-18 17:38:08.122646453 +0200
  10743. @@ -7,8 +7,7 @@
  10744. * Atomic wait-for-completion handler data structures.
  10745. * See kernel/sched/completion.c for details.
  10746. */
  10747. -
  10748. -#include <linux/wait.h>
  10749. +#include <linux/swait.h>
  10750. /*
  10751. * struct completion - structure used to maintain state for a "completion"
  10752. @@ -24,11 +23,11 @@
  10753. */
  10754. struct completion {
  10755. unsigned int done;
  10756. - wait_queue_head_t wait;
  10757. + struct swait_queue_head wait;
  10758. };
  10759. #define COMPLETION_INITIALIZER(work) \
  10760. - { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
  10761. + { 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
  10762. #define COMPLETION_INITIALIZER_ONSTACK(work) \
  10763. ({ init_completion(&work); work; })
  10764. @@ -73,7 +72,7 @@
  10765. static inline void init_completion(struct completion *x)
  10766. {
  10767. x->done = 0;
  10768. - init_waitqueue_head(&x->wait);
  10769. + init_swait_queue_head(&x->wait);
  10770. }
  10771. /**
  10772. diff -Nur linux-4.4.62.orig/include/linux/cpu.h linux-4.4.62/include/linux/cpu.h
  10773. --- linux-4.4.62.orig/include/linux/cpu.h 2017-04-18 07:15:37.000000000 +0200
  10774. +++ linux-4.4.62/include/linux/cpu.h 2017-04-18 17:38:08.122646453 +0200
  10775. @@ -224,6 +224,8 @@
  10776. extern void put_online_cpus(void);
  10777. extern void cpu_hotplug_disable(void);
  10778. extern void cpu_hotplug_enable(void);
  10779. +extern void pin_current_cpu(void);
  10780. +extern void unpin_current_cpu(void);
  10781. #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri)
  10782. #define __hotcpu_notifier(fn, pri) __cpu_notifier(fn, pri)
  10783. #define register_hotcpu_notifier(nb) register_cpu_notifier(nb)
  10784. @@ -241,6 +243,8 @@
  10785. #define put_online_cpus() do { } while (0)
  10786. #define cpu_hotplug_disable() do { } while (0)
  10787. #define cpu_hotplug_enable() do { } while (0)
  10788. +static inline void pin_current_cpu(void) { }
  10789. +static inline void unpin_current_cpu(void) { }
  10790. #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
  10791. #define __hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
  10792. /* These aren't inline functions due to a GCC bug. */
  10793. diff -Nur linux-4.4.62.orig/include/linux/delay.h linux-4.4.62/include/linux/delay.h
  10794. --- linux-4.4.62.orig/include/linux/delay.h 2017-04-18 07:15:37.000000000 +0200
  10795. +++ linux-4.4.62/include/linux/delay.h 2017-04-18 17:38:08.122646453 +0200
  10796. @@ -52,4 +52,10 @@
  10797. msleep(seconds * 1000);
  10798. }
  10799. +#ifdef CONFIG_PREEMPT_RT_FULL
  10800. +extern void cpu_chill(void);
  10801. +#else
  10802. +# define cpu_chill() cpu_relax()
  10803. +#endif
  10804. +
  10805. #endif /* defined(_LINUX_DELAY_H) */
  10806. diff -Nur linux-4.4.62.orig/include/linux/ftrace.h linux-4.4.62/include/linux/ftrace.h
  10807. --- linux-4.4.62.orig/include/linux/ftrace.h 2017-04-18 07:15:37.000000000 +0200
  10808. +++ linux-4.4.62/include/linux/ftrace.h 2017-04-18 17:38:08.122646453 +0200
  10809. @@ -694,6 +694,18 @@
  10810. #define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5))
  10811. #define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6))
  10812. +static inline unsigned long get_lock_parent_ip(void)
  10813. +{
  10814. + unsigned long addr = CALLER_ADDR0;
  10815. +
  10816. + if (!in_lock_functions(addr))
  10817. + return addr;
  10818. + addr = CALLER_ADDR1;
  10819. + if (!in_lock_functions(addr))
  10820. + return addr;
  10821. + return CALLER_ADDR2;
  10822. +}
  10823. +
  10824. #ifdef CONFIG_IRQSOFF_TRACER
  10825. extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
  10826. extern void time_hardirqs_off(unsigned long a0, unsigned long a1);
  10827. diff -Nur linux-4.4.62.orig/include/linux/highmem.h linux-4.4.62/include/linux/highmem.h
  10828. --- linux-4.4.62.orig/include/linux/highmem.h 2017-04-18 07:15:37.000000000 +0200
  10829. +++ linux-4.4.62/include/linux/highmem.h 2017-04-18 17:38:08.122646453 +0200
  10830. @@ -7,6 +7,7 @@
  10831. #include <linux/mm.h>
  10832. #include <linux/uaccess.h>
  10833. #include <linux/hardirq.h>
  10834. +#include <linux/sched.h>
  10835. #include <asm/cacheflush.h>
  10836. @@ -65,7 +66,7 @@
  10837. static inline void *kmap_atomic(struct page *page)
  10838. {
  10839. - preempt_disable();
  10840. + preempt_disable_nort();
  10841. pagefault_disable();
  10842. return page_address(page);
  10843. }
  10844. @@ -74,7 +75,7 @@
  10845. static inline void __kunmap_atomic(void *addr)
  10846. {
  10847. pagefault_enable();
  10848. - preempt_enable();
  10849. + preempt_enable_nort();
  10850. }
  10851. #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn))
  10852. @@ -86,32 +87,51 @@
  10853. #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
  10854. +#ifndef CONFIG_PREEMPT_RT_FULL
  10855. DECLARE_PER_CPU(int, __kmap_atomic_idx);
  10856. +#endif
  10857. static inline int kmap_atomic_idx_push(void)
  10858. {
  10859. +#ifndef CONFIG_PREEMPT_RT_FULL
  10860. int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1;
  10861. -#ifdef CONFIG_DEBUG_HIGHMEM
  10862. +# ifdef CONFIG_DEBUG_HIGHMEM
  10863. WARN_ON_ONCE(in_irq() && !irqs_disabled());
  10864. BUG_ON(idx >= KM_TYPE_NR);
  10865. -#endif
  10866. +# endif
  10867. return idx;
  10868. +#else
  10869. + current->kmap_idx++;
  10870. + BUG_ON(current->kmap_idx > KM_TYPE_NR);
  10871. + return current->kmap_idx - 1;
  10872. +#endif
  10873. }
  10874. static inline int kmap_atomic_idx(void)
  10875. {
  10876. +#ifndef CONFIG_PREEMPT_RT_FULL
  10877. return __this_cpu_read(__kmap_atomic_idx) - 1;
  10878. +#else
  10879. + return current->kmap_idx - 1;
  10880. +#endif
  10881. }
  10882. static inline void kmap_atomic_idx_pop(void)
  10883. {
  10884. -#ifdef CONFIG_DEBUG_HIGHMEM
  10885. +#ifndef CONFIG_PREEMPT_RT_FULL
  10886. +# ifdef CONFIG_DEBUG_HIGHMEM
  10887. int idx = __this_cpu_dec_return(__kmap_atomic_idx);
  10888. BUG_ON(idx < 0);
  10889. -#else
  10890. +# else
  10891. __this_cpu_dec(__kmap_atomic_idx);
  10892. +# endif
  10893. +#else
  10894. + current->kmap_idx--;
  10895. +# ifdef CONFIG_DEBUG_HIGHMEM
  10896. + BUG_ON(current->kmap_idx < 0);
  10897. +# endif
  10898. #endif
  10899. }
  10900. diff -Nur linux-4.4.62.orig/include/linux/hrtimer.h linux-4.4.62/include/linux/hrtimer.h
  10901. --- linux-4.4.62.orig/include/linux/hrtimer.h 2017-04-18 07:15:37.000000000 +0200
  10902. +++ linux-4.4.62/include/linux/hrtimer.h 2017-04-18 17:38:08.122646453 +0200
  10903. @@ -87,6 +87,9 @@
  10904. * @function: timer expiry callback function
  10905. * @base: pointer to the timer base (per cpu and per clock)
  10906. * @state: state information (See bit values above)
  10907. + * @cb_entry: list entry to defer timers from hardirq context
  10908. + * @irqsafe: timer can run in hardirq context
  10909. + * @praecox: timer expiry time if expired at the time of programming
  10910. * @is_rel: Set if the timer was armed relative
  10911. * @start_pid: timer statistics field to store the pid of the task which
  10912. * started the timer
  10913. @@ -103,6 +106,11 @@
  10914. enum hrtimer_restart (*function)(struct hrtimer *);
  10915. struct hrtimer_clock_base *base;
  10916. u8 state;
  10917. + struct list_head cb_entry;
  10918. + int irqsafe;
  10919. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  10920. + ktime_t praecox;
  10921. +#endif
  10922. u8 is_rel;
  10923. #ifdef CONFIG_TIMER_STATS
  10924. int start_pid;
  10925. @@ -123,11 +131,7 @@
  10926. struct task_struct *task;
  10927. };
  10928. -#ifdef CONFIG_64BIT
  10929. # define HRTIMER_CLOCK_BASE_ALIGN 64
  10930. -#else
  10931. -# define HRTIMER_CLOCK_BASE_ALIGN 32
  10932. -#endif
  10933. /**
  10934. * struct hrtimer_clock_base - the timer base for a specific clock
  10935. @@ -136,6 +140,7 @@
  10936. * timer to a base on another cpu.
  10937. * @clockid: clock id for per_cpu support
  10938. * @active: red black tree root node for the active timers
  10939. + * @expired: list head for deferred timers.
  10940. * @get_time: function to retrieve the current time of the clock
  10941. * @offset: offset of this clock to the monotonic base
  10942. */
  10943. @@ -144,6 +149,7 @@
  10944. int index;
  10945. clockid_t clockid;
  10946. struct timerqueue_head active;
  10947. + struct list_head expired;
  10948. ktime_t (*get_time)(void);
  10949. ktime_t offset;
  10950. } __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN)));
  10951. @@ -187,6 +193,7 @@
  10952. raw_spinlock_t lock;
  10953. seqcount_t seq;
  10954. struct hrtimer *running;
  10955. + struct hrtimer *running_soft;
  10956. unsigned int cpu;
  10957. unsigned int active_bases;
  10958. unsigned int clock_was_set_seq;
  10959. @@ -203,6 +210,9 @@
  10960. unsigned int nr_hangs;
  10961. unsigned int max_hang_time;
  10962. #endif
  10963. +#ifdef CONFIG_PREEMPT_RT_BASE
  10964. + wait_queue_head_t wait;
  10965. +#endif
  10966. struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
  10967. } ____cacheline_aligned;
  10968. @@ -412,6 +422,13 @@
  10969. hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
  10970. }
  10971. +/* Softirq preemption could deadlock timer removal */
  10972. +#ifdef CONFIG_PREEMPT_RT_BASE
  10973. + extern void hrtimer_wait_for_timer(const struct hrtimer *timer);
  10974. +#else
  10975. +# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0)
  10976. +#endif
  10977. +
  10978. /* Query timers: */
  10979. extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust);
  10980. @@ -436,7 +453,7 @@
  10981. * Helper function to check, whether the timer is running the callback
  10982. * function
  10983. */
  10984. -static inline int hrtimer_callback_running(struct hrtimer *timer)
  10985. +static inline int hrtimer_callback_running(const struct hrtimer *timer)
  10986. {
  10987. return timer->base->cpu_base->running == timer;
  10988. }
  10989. diff -Nur linux-4.4.62.orig/include/linux/idr.h linux-4.4.62/include/linux/idr.h
  10990. --- linux-4.4.62.orig/include/linux/idr.h 2017-04-18 07:15:37.000000000 +0200
  10991. +++ linux-4.4.62/include/linux/idr.h 2017-04-18 17:38:08.122646453 +0200
  10992. @@ -95,10 +95,14 @@
  10993. * Each idr_preload() should be matched with an invocation of this
  10994. * function. See idr_preload() for details.
  10995. */
  10996. +#ifdef CONFIG_PREEMPT_RT_FULL
  10997. +void idr_preload_end(void);
  10998. +#else
  10999. static inline void idr_preload_end(void)
  11000. {
  11001. preempt_enable();
  11002. }
  11003. +#endif
  11004. /**
  11005. * idr_find - return pointer for given id
  11006. diff -Nur linux-4.4.62.orig/include/linux/init_task.h linux-4.4.62/include/linux/init_task.h
  11007. --- linux-4.4.62.orig/include/linux/init_task.h 2017-04-18 07:15:37.000000000 +0200
  11008. +++ linux-4.4.62/include/linux/init_task.h 2017-04-18 17:38:08.122646453 +0200
  11009. @@ -148,9 +148,15 @@
  11010. # define INIT_PERF_EVENTS(tsk)
  11011. #endif
  11012. +#ifdef CONFIG_PREEMPT_RT_BASE
  11013. +# define INIT_TIMER_LIST .posix_timer_list = NULL,
  11014. +#else
  11015. +# define INIT_TIMER_LIST
  11016. +#endif
  11017. +
  11018. #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
  11019. # define INIT_VTIME(tsk) \
  11020. - .vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock), \
  11021. + .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \
  11022. .vtime_snap = 0, \
  11023. .vtime_snap_whence = VTIME_SYS,
  11024. #else
  11025. @@ -239,6 +245,7 @@
  11026. .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
  11027. .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
  11028. .timer_slack_ns = 50000, /* 50 usec default slack */ \
  11029. + INIT_TIMER_LIST \
  11030. .pids = { \
  11031. [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \
  11032. [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \
  11033. diff -Nur linux-4.4.62.orig/include/linux/interrupt.h linux-4.4.62/include/linux/interrupt.h
  11034. --- linux-4.4.62.orig/include/linux/interrupt.h 2017-04-18 07:15:37.000000000 +0200
  11035. +++ linux-4.4.62/include/linux/interrupt.h 2017-04-18 17:38:08.126646608 +0200
  11036. @@ -61,6 +61,7 @@
  11037. * interrupt handler after suspending interrupts. For system
  11038. * wakeup devices users need to implement wakeup detection in
  11039. * their interrupt handlers.
  11040. + * IRQF_NO_SOFTIRQ_CALL - Do not process softirqs in the irq thread context (RT)
  11041. */
  11042. #define IRQF_SHARED 0x00000080
  11043. #define IRQF_PROBE_SHARED 0x00000100
  11044. @@ -74,6 +75,7 @@
  11045. #define IRQF_NO_THREAD 0x00010000
  11046. #define IRQF_EARLY_RESUME 0x00020000
  11047. #define IRQF_COND_SUSPEND 0x00040000
  11048. +#define IRQF_NO_SOFTIRQ_CALL 0x00080000
  11049. #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
  11050. @@ -186,7 +188,7 @@
  11051. #ifdef CONFIG_LOCKDEP
  11052. # define local_irq_enable_in_hardirq() do { } while (0)
  11053. #else
  11054. -# define local_irq_enable_in_hardirq() local_irq_enable()
  11055. +# define local_irq_enable_in_hardirq() local_irq_enable_nort()
  11056. #endif
  11057. extern void disable_irq_nosync(unsigned int irq);
  11058. @@ -206,6 +208,7 @@
  11059. * @irq: Interrupt to which notification applies
  11060. * @kref: Reference count, for internal use
  11061. * @work: Work item, for internal use
  11062. + * @list: List item for deferred callbacks
  11063. * @notify: Function to be called on change. This will be
  11064. * called in process context.
  11065. * @release: Function to be called on release. This will be
  11066. @@ -217,6 +220,7 @@
  11067. unsigned int irq;
  11068. struct kref kref;
  11069. struct work_struct work;
  11070. + struct list_head list;
  11071. void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
  11072. void (*release)(struct kref *ref);
  11073. };
  11074. @@ -379,9 +383,13 @@
  11075. bool state);
  11076. #ifdef CONFIG_IRQ_FORCED_THREADING
  11077. +# ifndef CONFIG_PREEMPT_RT_BASE
  11078. extern bool force_irqthreads;
  11079. +# else
  11080. +# define force_irqthreads (true)
  11081. +# endif
  11082. #else
  11083. -#define force_irqthreads (0)
  11084. +#define force_irqthreads (false)
  11085. #endif
  11086. #ifndef __ARCH_SET_SOFTIRQ_PENDING
  11087. @@ -438,9 +446,10 @@
  11088. void (*action)(struct softirq_action *);
  11089. };
  11090. +#ifndef CONFIG_PREEMPT_RT_FULL
  11091. asmlinkage void do_softirq(void);
  11092. asmlinkage void __do_softirq(void);
  11093. -
  11094. +static inline void thread_do_softirq(void) { do_softirq(); }
  11095. #ifdef __ARCH_HAS_DO_SOFTIRQ
  11096. void do_softirq_own_stack(void);
  11097. #else
  11098. @@ -449,13 +458,25 @@
  11099. __do_softirq();
  11100. }
  11101. #endif
  11102. +#else
  11103. +extern void thread_do_softirq(void);
  11104. +#endif
  11105. extern void open_softirq(int nr, void (*action)(struct softirq_action *));
  11106. extern void softirq_init(void);
  11107. extern void __raise_softirq_irqoff(unsigned int nr);
  11108. +#ifdef CONFIG_PREEMPT_RT_FULL
  11109. +extern void __raise_softirq_irqoff_ksoft(unsigned int nr);
  11110. +#else
  11111. +static inline void __raise_softirq_irqoff_ksoft(unsigned int nr)
  11112. +{
  11113. + __raise_softirq_irqoff(nr);
  11114. +}
  11115. +#endif
  11116. extern void raise_softirq_irqoff(unsigned int nr);
  11117. extern void raise_softirq(unsigned int nr);
  11118. +extern void softirq_check_pending_idle(void);
  11119. DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
  11120. @@ -477,8 +498,9 @@
  11121. to be executed on some cpu at least once after this.
  11122. * If the tasklet is already scheduled, but its execution is still not
  11123. started, it will be executed only once.
  11124. - * If this tasklet is already running on another CPU (or schedule is called
  11125. - from tasklet itself), it is rescheduled for later.
  11126. + * If this tasklet is already running on another CPU, it is rescheduled
  11127. + for later.
  11128. + * Schedule must not be called from the tasklet itself (a lockup occurs)
  11129. * Tasklet is strictly serialized wrt itself, but not
  11130. wrt another tasklets. If client needs some intertask synchronization,
  11131. he makes it with spinlocks.
  11132. @@ -503,27 +525,36 @@
  11133. enum
  11134. {
  11135. TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */
  11136. - TASKLET_STATE_RUN /* Tasklet is running (SMP only) */
  11137. + TASKLET_STATE_RUN, /* Tasklet is running (SMP only) */
  11138. + TASKLET_STATE_PENDING /* Tasklet is pending */
  11139. };
  11140. -#ifdef CONFIG_SMP
  11141. +#define TASKLET_STATEF_SCHED (1 << TASKLET_STATE_SCHED)
  11142. +#define TASKLET_STATEF_RUN (1 << TASKLET_STATE_RUN)
  11143. +#define TASKLET_STATEF_PENDING (1 << TASKLET_STATE_PENDING)
  11144. +
  11145. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  11146. static inline int tasklet_trylock(struct tasklet_struct *t)
  11147. {
  11148. return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state);
  11149. }
  11150. +static inline int tasklet_tryunlock(struct tasklet_struct *t)
  11151. +{
  11152. + return cmpxchg(&t->state, TASKLET_STATEF_RUN, 0) == TASKLET_STATEF_RUN;
  11153. +}
  11154. +
  11155. static inline void tasklet_unlock(struct tasklet_struct *t)
  11156. {
  11157. smp_mb__before_atomic();
  11158. clear_bit(TASKLET_STATE_RUN, &(t)->state);
  11159. }
  11160. -static inline void tasklet_unlock_wait(struct tasklet_struct *t)
  11161. -{
  11162. - while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); }
  11163. -}
  11164. +extern void tasklet_unlock_wait(struct tasklet_struct *t);
  11165. +
  11166. #else
  11167. #define tasklet_trylock(t) 1
  11168. +#define tasklet_tryunlock(t) 1
  11169. #define tasklet_unlock_wait(t) do { } while (0)
  11170. #define tasklet_unlock(t) do { } while (0)
  11171. #endif
  11172. @@ -572,12 +603,7 @@
  11173. smp_mb();
  11174. }
  11175. -static inline void tasklet_enable(struct tasklet_struct *t)
  11176. -{
  11177. - smp_mb__before_atomic();
  11178. - atomic_dec(&t->count);
  11179. -}
  11180. -
  11181. +extern void tasklet_enable(struct tasklet_struct *t);
  11182. extern void tasklet_kill(struct tasklet_struct *t);
  11183. extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu);
  11184. extern void tasklet_init(struct tasklet_struct *t,
  11185. @@ -608,6 +634,12 @@
  11186. tasklet_kill(&ttimer->tasklet);
  11187. }
  11188. +#ifdef CONFIG_PREEMPT_RT_FULL
  11189. +extern void softirq_early_init(void);
  11190. +#else
  11191. +static inline void softirq_early_init(void) { }
  11192. +#endif
  11193. +
  11194. /*
  11195. * Autoprobing for irqs:
  11196. *
  11197. diff -Nur linux-4.4.62.orig/include/linux/irqdesc.h linux-4.4.62/include/linux/irqdesc.h
  11198. --- linux-4.4.62.orig/include/linux/irqdesc.h 2017-04-18 07:15:37.000000000 +0200
  11199. +++ linux-4.4.62/include/linux/irqdesc.h 2017-04-18 17:38:08.126646608 +0200
  11200. @@ -61,6 +61,7 @@
  11201. unsigned int irqs_unhandled;
  11202. atomic_t threads_handled;
  11203. int threads_handled_last;
  11204. + u64 random_ip;
  11205. raw_spinlock_t lock;
  11206. struct cpumask *percpu_enabled;
  11207. #ifdef CONFIG_SMP
  11208. diff -Nur linux-4.4.62.orig/include/linux/irqflags.h linux-4.4.62/include/linux/irqflags.h
  11209. --- linux-4.4.62.orig/include/linux/irqflags.h 2017-04-18 07:15:37.000000000 +0200
  11210. +++ linux-4.4.62/include/linux/irqflags.h 2017-04-18 17:38:08.126646608 +0200
  11211. @@ -25,8 +25,6 @@
  11212. # define trace_softirqs_enabled(p) ((p)->softirqs_enabled)
  11213. # define trace_hardirq_enter() do { current->hardirq_context++; } while (0)
  11214. # define trace_hardirq_exit() do { current->hardirq_context--; } while (0)
  11215. -# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
  11216. -# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)
  11217. # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1,
  11218. #else
  11219. # define trace_hardirqs_on() do { } while (0)
  11220. @@ -39,9 +37,15 @@
  11221. # define trace_softirqs_enabled(p) 0
  11222. # define trace_hardirq_enter() do { } while (0)
  11223. # define trace_hardirq_exit() do { } while (0)
  11224. +# define INIT_TRACE_IRQFLAGS
  11225. +#endif
  11226. +
  11227. +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT_FULL)
  11228. +# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
  11229. +# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)
  11230. +#else
  11231. # define lockdep_softirq_enter() do { } while (0)
  11232. # define lockdep_softirq_exit() do { } while (0)
  11233. -# define INIT_TRACE_IRQFLAGS
  11234. #endif
  11235. #if defined(CONFIG_IRQSOFF_TRACER) || \
  11236. @@ -148,4 +152,23 @@
  11237. #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags)
  11238. +/*
  11239. + * local_irq* variants depending on RT/!RT
  11240. + */
  11241. +#ifdef CONFIG_PREEMPT_RT_FULL
  11242. +# define local_irq_disable_nort() do { } while (0)
  11243. +# define local_irq_enable_nort() do { } while (0)
  11244. +# define local_irq_save_nort(flags) local_save_flags(flags)
  11245. +# define local_irq_restore_nort(flags) (void)(flags)
  11246. +# define local_irq_disable_rt() local_irq_disable()
  11247. +# define local_irq_enable_rt() local_irq_enable()
  11248. +#else
  11249. +# define local_irq_disable_nort() local_irq_disable()
  11250. +# define local_irq_enable_nort() local_irq_enable()
  11251. +# define local_irq_save_nort(flags) local_irq_save(flags)
  11252. +# define local_irq_restore_nort(flags) local_irq_restore(flags)
  11253. +# define local_irq_disable_rt() do { } while (0)
  11254. +# define local_irq_enable_rt() do { } while (0)
  11255. +#endif
  11256. +
  11257. #endif
  11258. diff -Nur linux-4.4.62.orig/include/linux/irq.h linux-4.4.62/include/linux/irq.h
  11259. --- linux-4.4.62.orig/include/linux/irq.h 2017-04-18 07:15:37.000000000 +0200
  11260. +++ linux-4.4.62/include/linux/irq.h 2017-04-18 17:38:08.126646608 +0200
  11261. @@ -72,6 +72,7 @@
  11262. * IRQ_IS_POLLED - Always polled by another interrupt. Exclude
  11263. * it from the spurious interrupt detection
  11264. * mechanism and from core side polling.
  11265. + * IRQ_NO_SOFTIRQ_CALL - No softirq processing in the irq thread context (RT)
  11266. * IRQ_DISABLE_UNLAZY - Disable lazy irq disable
  11267. */
  11268. enum {
  11269. @@ -99,13 +100,14 @@
  11270. IRQ_PER_CPU_DEVID = (1 << 17),
  11271. IRQ_IS_POLLED = (1 << 18),
  11272. IRQ_DISABLE_UNLAZY = (1 << 19),
  11273. + IRQ_NO_SOFTIRQ_CALL = (1 << 20),
  11274. };
  11275. #define IRQF_MODIFY_MASK \
  11276. (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
  11277. IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \
  11278. IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \
  11279. - IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY)
  11280. + IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY | IRQ_NO_SOFTIRQ_CALL)
  11281. #define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING)
  11282. diff -Nur linux-4.4.62.orig/include/linux/irq_work.h linux-4.4.62/include/linux/irq_work.h
  11283. --- linux-4.4.62.orig/include/linux/irq_work.h 2017-04-18 07:15:37.000000000 +0200
  11284. +++ linux-4.4.62/include/linux/irq_work.h 2017-04-18 17:38:08.126646608 +0200
  11285. @@ -16,6 +16,7 @@
  11286. #define IRQ_WORK_BUSY 2UL
  11287. #define IRQ_WORK_FLAGS 3UL
  11288. #define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */
  11289. +#define IRQ_WORK_HARD_IRQ 8UL /* Run hard IRQ context, even on RT */
  11290. struct irq_work {
  11291. unsigned long flags;
  11292. @@ -51,4 +52,10 @@
  11293. static inline void irq_work_run(void) { }
  11294. #endif
  11295. +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
  11296. +void irq_work_tick_soft(void);
  11297. +#else
  11298. +static inline void irq_work_tick_soft(void) { }
  11299. +#endif
  11300. +
  11301. #endif /* _LINUX_IRQ_WORK_H */
  11302. diff -Nur linux-4.4.62.orig/include/linux/jbd2.h linux-4.4.62/include/linux/jbd2.h
  11303. --- linux-4.4.62.orig/include/linux/jbd2.h 2017-04-18 07:15:37.000000000 +0200
  11304. +++ linux-4.4.62/include/linux/jbd2.h 2017-04-18 17:38:08.126646608 +0200
  11305. @@ -352,32 +352,56 @@
  11306. static inline void jbd_lock_bh_state(struct buffer_head *bh)
  11307. {
  11308. +#ifndef CONFIG_PREEMPT_RT_BASE
  11309. bit_spin_lock(BH_State, &bh->b_state);
  11310. +#else
  11311. + spin_lock(&bh->b_state_lock);
  11312. +#endif
  11313. }
  11314. static inline int jbd_trylock_bh_state(struct buffer_head *bh)
  11315. {
  11316. +#ifndef CONFIG_PREEMPT_RT_BASE
  11317. return bit_spin_trylock(BH_State, &bh->b_state);
  11318. +#else
  11319. + return spin_trylock(&bh->b_state_lock);
  11320. +#endif
  11321. }
  11322. static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
  11323. {
  11324. +#ifndef CONFIG_PREEMPT_RT_BASE
  11325. return bit_spin_is_locked(BH_State, &bh->b_state);
  11326. +#else
  11327. + return spin_is_locked(&bh->b_state_lock);
  11328. +#endif
  11329. }
  11330. static inline void jbd_unlock_bh_state(struct buffer_head *bh)
  11331. {
  11332. +#ifndef CONFIG_PREEMPT_RT_BASE
  11333. bit_spin_unlock(BH_State, &bh->b_state);
  11334. +#else
  11335. + spin_unlock(&bh->b_state_lock);
  11336. +#endif
  11337. }
  11338. static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
  11339. {
  11340. +#ifndef CONFIG_PREEMPT_RT_BASE
  11341. bit_spin_lock(BH_JournalHead, &bh->b_state);
  11342. +#else
  11343. + spin_lock(&bh->b_journal_head_lock);
  11344. +#endif
  11345. }
  11346. static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
  11347. {
  11348. +#ifndef CONFIG_PREEMPT_RT_BASE
  11349. bit_spin_unlock(BH_JournalHead, &bh->b_state);
  11350. +#else
  11351. + spin_unlock(&bh->b_journal_head_lock);
  11352. +#endif
  11353. }
  11354. #define J_ASSERT(assert) BUG_ON(!(assert))
  11355. diff -Nur linux-4.4.62.orig/include/linux/kdb.h linux-4.4.62/include/linux/kdb.h
  11356. --- linux-4.4.62.orig/include/linux/kdb.h 2017-04-18 07:15:37.000000000 +0200
  11357. +++ linux-4.4.62/include/linux/kdb.h 2017-04-18 17:38:08.126646608 +0200
  11358. @@ -167,6 +167,7 @@
  11359. extern __printf(1, 2) int kdb_printf(const char *, ...);
  11360. typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...);
  11361. +#define in_kdb_printk() (kdb_trap_printk)
  11362. extern void kdb_init(int level);
  11363. /* Access to kdb specific polling devices */
  11364. @@ -201,6 +202,7 @@
  11365. extern int kdb_unregister(char *);
  11366. #else /* ! CONFIG_KGDB_KDB */
  11367. static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; }
  11368. +#define in_kdb_printk() (0)
  11369. static inline void kdb_init(int level) {}
  11370. static inline int kdb_register(char *cmd, kdb_func_t func, char *usage,
  11371. char *help, short minlen) { return 0; }
  11372. diff -Nur linux-4.4.62.orig/include/linux/kernel.h linux-4.4.62/include/linux/kernel.h
  11373. --- linux-4.4.62.orig/include/linux/kernel.h 2017-04-18 07:15:37.000000000 +0200
  11374. +++ linux-4.4.62/include/linux/kernel.h 2017-04-18 17:38:08.126646608 +0200
  11375. @@ -188,6 +188,9 @@
  11376. */
  11377. # define might_sleep() \
  11378. do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
  11379. +
  11380. +# define might_sleep_no_state_check() \
  11381. + do { ___might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
  11382. # define sched_annotate_sleep() (current->task_state_change = 0)
  11383. #else
  11384. static inline void ___might_sleep(const char *file, int line,
  11385. @@ -195,6 +198,7 @@
  11386. static inline void __might_sleep(const char *file, int line,
  11387. int preempt_offset) { }
  11388. # define might_sleep() do { might_resched(); } while (0)
  11389. +# define might_sleep_no_state_check() do { might_resched(); } while (0)
  11390. # define sched_annotate_sleep() do { } while (0)
  11391. #endif
  11392. @@ -255,6 +259,7 @@
  11393. __printf(1, 2)
  11394. void panic(const char *fmt, ...)
  11395. __noreturn __cold;
  11396. +void nmi_panic(struct pt_regs *regs, const char *msg);
  11397. extern void oops_enter(void);
  11398. extern void oops_exit(void);
  11399. void print_oops_end_marker(void);
  11400. @@ -448,6 +453,14 @@
  11401. extern bool crash_kexec_post_notifiers;
  11402. /*
  11403. + * panic_cpu is used for synchronizing panic() and crash_kexec() execution. It
  11404. + * holds a CPU number which is executing panic() currently. A value of
  11405. + * PANIC_CPU_INVALID means no CPU has entered panic() or crash_kexec().
  11406. + */
  11407. +extern atomic_t panic_cpu;
  11408. +#define PANIC_CPU_INVALID -1
  11409. +
  11410. +/*
  11411. * Only to be used by arch init code. If the user over-wrote the default
  11412. * CONFIG_PANIC_TIMEOUT, honor it.
  11413. */
  11414. @@ -475,6 +488,7 @@
  11415. SYSTEM_HALT,
  11416. SYSTEM_POWER_OFF,
  11417. SYSTEM_RESTART,
  11418. + SYSTEM_SUSPEND,
  11419. } system_state;
  11420. #define TAINT_PROPRIETARY_MODULE 0
  11421. diff -Nur linux-4.4.62.orig/include/linux/kvm_host.h linux-4.4.62/include/linux/kvm_host.h
  11422. --- linux-4.4.62.orig/include/linux/kvm_host.h 2017-04-18 07:15:37.000000000 +0200
  11423. +++ linux-4.4.62/include/linux/kvm_host.h 2017-04-18 17:38:08.126646608 +0200
  11424. @@ -25,6 +25,7 @@
  11425. #include <linux/irqflags.h>
  11426. #include <linux/context_tracking.h>
  11427. #include <linux/irqbypass.h>
  11428. +#include <linux/swait.h>
  11429. #include <asm/signal.h>
  11430. #include <linux/kvm.h>
  11431. @@ -243,7 +244,7 @@
  11432. int fpu_active;
  11433. int guest_fpu_loaded, guest_xcr0_loaded;
  11434. unsigned char fpu_counter;
  11435. - wait_queue_head_t wq;
  11436. + struct swait_queue_head wq;
  11437. struct pid *pid;
  11438. int sigset_active;
  11439. sigset_t sigset;
  11440. @@ -794,7 +795,7 @@
  11441. }
  11442. #endif
  11443. -static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
  11444. +static inline struct swait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
  11445. {
  11446. #ifdef __KVM_HAVE_ARCH_WQP
  11447. return vcpu->arch.wqp;
  11448. diff -Nur linux-4.4.62.orig/include/linux/lglock.h linux-4.4.62/include/linux/lglock.h
  11449. --- linux-4.4.62.orig/include/linux/lglock.h 2017-04-18 07:15:37.000000000 +0200
  11450. +++ linux-4.4.62/include/linux/lglock.h 2017-04-18 17:38:08.126646608 +0200
  11451. @@ -34,13 +34,30 @@
  11452. #endif
  11453. struct lglock {
  11454. +#ifdef CONFIG_PREEMPT_RT_FULL
  11455. + struct rt_mutex __percpu *lock;
  11456. +#else
  11457. arch_spinlock_t __percpu *lock;
  11458. +#endif
  11459. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  11460. struct lock_class_key lock_key;
  11461. struct lockdep_map lock_dep_map;
  11462. #endif
  11463. };
  11464. +#ifdef CONFIG_PREEMPT_RT_FULL
  11465. +# define DEFINE_LGLOCK(name) \
  11466. + static DEFINE_PER_CPU(struct rt_mutex, name ## _lock) \
  11467. + = __RT_MUTEX_INITIALIZER( name ## _lock); \
  11468. + struct lglock name = { .lock = &name ## _lock }
  11469. +
  11470. +# define DEFINE_STATIC_LGLOCK(name) \
  11471. + static DEFINE_PER_CPU(struct rt_mutex, name ## _lock) \
  11472. + = __RT_MUTEX_INITIALIZER( name ## _lock); \
  11473. + static struct lglock name = { .lock = &name ## _lock }
  11474. +
  11475. +#else
  11476. +
  11477. #define DEFINE_LGLOCK(name) \
  11478. static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \
  11479. = __ARCH_SPIN_LOCK_UNLOCKED; \
  11480. @@ -50,6 +67,7 @@
  11481. static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \
  11482. = __ARCH_SPIN_LOCK_UNLOCKED; \
  11483. static struct lglock name = { .lock = &name ## _lock }
  11484. +#endif
  11485. void lg_lock_init(struct lglock *lg, char *name);
  11486. @@ -64,6 +82,12 @@
  11487. void lg_global_lock(struct lglock *lg);
  11488. void lg_global_unlock(struct lglock *lg);
  11489. +#ifndef CONFIG_PREEMPT_RT_FULL
  11490. +#define lg_global_trylock_relax(name) lg_global_lock(name)
  11491. +#else
  11492. +void lg_global_trylock_relax(struct lglock *lg);
  11493. +#endif
  11494. +
  11495. #else
  11496. /* When !CONFIG_SMP, map lglock to spinlock */
  11497. #define lglock spinlock
  11498. diff -Nur linux-4.4.62.orig/include/linux/list_bl.h linux-4.4.62/include/linux/list_bl.h
  11499. --- linux-4.4.62.orig/include/linux/list_bl.h 2017-04-18 07:15:37.000000000 +0200
  11500. +++ linux-4.4.62/include/linux/list_bl.h 2017-04-18 17:38:08.126646608 +0200
  11501. @@ -2,6 +2,7 @@
  11502. #define _LINUX_LIST_BL_H
  11503. #include <linux/list.h>
  11504. +#include <linux/spinlock.h>
  11505. #include <linux/bit_spinlock.h>
  11506. /*
  11507. @@ -32,13 +33,24 @@
  11508. struct hlist_bl_head {
  11509. struct hlist_bl_node *first;
  11510. +#ifdef CONFIG_PREEMPT_RT_BASE
  11511. + raw_spinlock_t lock;
  11512. +#endif
  11513. };
  11514. struct hlist_bl_node {
  11515. struct hlist_bl_node *next, **pprev;
  11516. };
  11517. -#define INIT_HLIST_BL_HEAD(ptr) \
  11518. - ((ptr)->first = NULL)
  11519. +
  11520. +#ifdef CONFIG_PREEMPT_RT_BASE
  11521. +#define INIT_HLIST_BL_HEAD(h) \
  11522. +do { \
  11523. + (h)->first = NULL; \
  11524. + raw_spin_lock_init(&(h)->lock); \
  11525. +} while (0)
  11526. +#else
  11527. +#define INIT_HLIST_BL_HEAD(h) (h)->first = NULL
  11528. +#endif
  11529. static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h)
  11530. {
  11531. @@ -118,12 +130,26 @@
  11532. static inline void hlist_bl_lock(struct hlist_bl_head *b)
  11533. {
  11534. +#ifndef CONFIG_PREEMPT_RT_BASE
  11535. bit_spin_lock(0, (unsigned long *)b);
  11536. +#else
  11537. + raw_spin_lock(&b->lock);
  11538. +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
  11539. + __set_bit(0, (unsigned long *)b);
  11540. +#endif
  11541. +#endif
  11542. }
  11543. static inline void hlist_bl_unlock(struct hlist_bl_head *b)
  11544. {
  11545. +#ifndef CONFIG_PREEMPT_RT_BASE
  11546. __bit_spin_unlock(0, (unsigned long *)b);
  11547. +#else
  11548. +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
  11549. + __clear_bit(0, (unsigned long *)b);
  11550. +#endif
  11551. + raw_spin_unlock(&b->lock);
  11552. +#endif
  11553. }
  11554. static inline bool hlist_bl_is_locked(struct hlist_bl_head *b)
  11555. diff -Nur linux-4.4.62.orig/include/linux/locallock.h linux-4.4.62/include/linux/locallock.h
  11556. --- linux-4.4.62.orig/include/linux/locallock.h 1970-01-01 01:00:00.000000000 +0100
  11557. +++ linux-4.4.62/include/linux/locallock.h 2017-04-18 17:38:08.126646608 +0200
  11558. @@ -0,0 +1,276 @@
  11559. +#ifndef _LINUX_LOCALLOCK_H
  11560. +#define _LINUX_LOCALLOCK_H
  11561. +
  11562. +#include <linux/percpu.h>
  11563. +#include <linux/spinlock.h>
  11564. +
  11565. +#ifdef CONFIG_PREEMPT_RT_BASE
  11566. +
  11567. +#ifdef CONFIG_DEBUG_SPINLOCK
  11568. +# define LL_WARN(cond) WARN_ON(cond)
  11569. +#else
  11570. +# define LL_WARN(cond) do { } while (0)
  11571. +#endif
  11572. +
  11573. +/*
  11574. + * per cpu lock based substitute for local_irq_*()
  11575. + */
  11576. +struct local_irq_lock {
  11577. + spinlock_t lock;
  11578. + struct task_struct *owner;
  11579. + int nestcnt;
  11580. + unsigned long flags;
  11581. +};
  11582. +
  11583. +#define DEFINE_LOCAL_IRQ_LOCK(lvar) \
  11584. + DEFINE_PER_CPU(struct local_irq_lock, lvar) = { \
  11585. + .lock = __SPIN_LOCK_UNLOCKED((lvar).lock) }
  11586. +
  11587. +#define DECLARE_LOCAL_IRQ_LOCK(lvar) \
  11588. + DECLARE_PER_CPU(struct local_irq_lock, lvar)
  11589. +
  11590. +#define local_irq_lock_init(lvar) \
  11591. + do { \
  11592. + int __cpu; \
  11593. + for_each_possible_cpu(__cpu) \
  11594. + spin_lock_init(&per_cpu(lvar, __cpu).lock); \
  11595. + } while (0)
  11596. +
  11597. +/*
  11598. + * spin_lock|trylock|unlock_local flavour that does not migrate disable
  11599. + * used for __local_lock|trylock|unlock where get_local_var/put_local_var
  11600. + * already takes care of the migrate_disable/enable
  11601. + * for CONFIG_PREEMPT_BASE map to the normal spin_* calls.
  11602. + */
  11603. +#ifdef CONFIG_PREEMPT_RT_FULL
  11604. +# define spin_lock_local(lock) rt_spin_lock__no_mg(lock)
  11605. +# define spin_trylock_local(lock) rt_spin_trylock__no_mg(lock)
  11606. +# define spin_unlock_local(lock) rt_spin_unlock__no_mg(lock)
  11607. +#else
  11608. +# define spin_lock_local(lock) spin_lock(lock)
  11609. +# define spin_trylock_local(lock) spin_trylock(lock)
  11610. +# define spin_unlock_local(lock) spin_unlock(lock)
  11611. +#endif
  11612. +
  11613. +static inline void __local_lock(struct local_irq_lock *lv)
  11614. +{
  11615. + if (lv->owner != current) {
  11616. + spin_lock_local(&lv->lock);
  11617. + LL_WARN(lv->owner);
  11618. + LL_WARN(lv->nestcnt);
  11619. + lv->owner = current;
  11620. + }
  11621. + lv->nestcnt++;
  11622. +}
  11623. +
  11624. +#define local_lock(lvar) \
  11625. + do { __local_lock(&get_local_var(lvar)); } while (0)
  11626. +
  11627. +#define local_lock_on(lvar, cpu) \
  11628. + do { __local_lock(&per_cpu(lvar, cpu)); } while (0)
  11629. +
  11630. +static inline int __local_trylock(struct local_irq_lock *lv)
  11631. +{
  11632. + if (lv->owner != current && spin_trylock_local(&lv->lock)) {
  11633. + LL_WARN(lv->owner);
  11634. + LL_WARN(lv->nestcnt);
  11635. + lv->owner = current;
  11636. + lv->nestcnt = 1;
  11637. + return 1;
  11638. + }
  11639. + return 0;
  11640. +}
  11641. +
  11642. +#define local_trylock(lvar) \
  11643. + ({ \
  11644. + int __locked; \
  11645. + __locked = __local_trylock(&get_local_var(lvar)); \
  11646. + if (!__locked) \
  11647. + put_local_var(lvar); \
  11648. + __locked; \
  11649. + })
  11650. +
  11651. +static inline void __local_unlock(struct local_irq_lock *lv)
  11652. +{
  11653. + LL_WARN(lv->nestcnt == 0);
  11654. + LL_WARN(lv->owner != current);
  11655. + if (--lv->nestcnt)
  11656. + return;
  11657. +
  11658. + lv->owner = NULL;
  11659. + spin_unlock_local(&lv->lock);
  11660. +}
  11661. +
  11662. +#define local_unlock(lvar) \
  11663. + do { \
  11664. + __local_unlock(this_cpu_ptr(&lvar)); \
  11665. + put_local_var(lvar); \
  11666. + } while (0)
  11667. +
  11668. +#define local_unlock_on(lvar, cpu) \
  11669. + do { __local_unlock(&per_cpu(lvar, cpu)); } while (0)
  11670. +
  11671. +static inline void __local_lock_irq(struct local_irq_lock *lv)
  11672. +{
  11673. + spin_lock_irqsave(&lv->lock, lv->flags);
  11674. + LL_WARN(lv->owner);
  11675. + LL_WARN(lv->nestcnt);
  11676. + lv->owner = current;
  11677. + lv->nestcnt = 1;
  11678. +}
  11679. +
  11680. +#define local_lock_irq(lvar) \
  11681. + do { __local_lock_irq(&get_local_var(lvar)); } while (0)
  11682. +
  11683. +#define local_lock_irq_on(lvar, cpu) \
  11684. + do { __local_lock_irq(&per_cpu(lvar, cpu)); } while (0)
  11685. +
  11686. +static inline void __local_unlock_irq(struct local_irq_lock *lv)
  11687. +{
  11688. + LL_WARN(!lv->nestcnt);
  11689. + LL_WARN(lv->owner != current);
  11690. + lv->owner = NULL;
  11691. + lv->nestcnt = 0;
  11692. + spin_unlock_irq(&lv->lock);
  11693. +}
  11694. +
  11695. +#define local_unlock_irq(lvar) \
  11696. + do { \
  11697. + __local_unlock_irq(this_cpu_ptr(&lvar)); \
  11698. + put_local_var(lvar); \
  11699. + } while (0)
  11700. +
  11701. +#define local_unlock_irq_on(lvar, cpu) \
  11702. + do { \
  11703. + __local_unlock_irq(&per_cpu(lvar, cpu)); \
  11704. + } while (0)
  11705. +
  11706. +static inline int __local_lock_irqsave(struct local_irq_lock *lv)
  11707. +{
  11708. + if (lv->owner != current) {
  11709. + __local_lock_irq(lv);
  11710. + return 0;
  11711. + } else {
  11712. + lv->nestcnt++;
  11713. + return 1;
  11714. + }
  11715. +}
  11716. +
  11717. +#define local_lock_irqsave(lvar, _flags) \
  11718. + do { \
  11719. + if (__local_lock_irqsave(&get_local_var(lvar))) \
  11720. + put_local_var(lvar); \
  11721. + _flags = __this_cpu_read(lvar.flags); \
  11722. + } while (0)
  11723. +
  11724. +#define local_lock_irqsave_on(lvar, _flags, cpu) \
  11725. + do { \
  11726. + __local_lock_irqsave(&per_cpu(lvar, cpu)); \
  11727. + _flags = per_cpu(lvar, cpu).flags; \
  11728. + } while (0)
  11729. +
  11730. +static inline int __local_unlock_irqrestore(struct local_irq_lock *lv,
  11731. + unsigned long flags)
  11732. +{
  11733. + LL_WARN(!lv->nestcnt);
  11734. + LL_WARN(lv->owner != current);
  11735. + if (--lv->nestcnt)
  11736. + return 0;
  11737. +
  11738. + lv->owner = NULL;
  11739. + spin_unlock_irqrestore(&lv->lock, lv->flags);
  11740. + return 1;
  11741. +}
  11742. +
  11743. +#define local_unlock_irqrestore(lvar, flags) \
  11744. + do { \
  11745. + if (__local_unlock_irqrestore(this_cpu_ptr(&lvar), flags)) \
  11746. + put_local_var(lvar); \
  11747. + } while (0)
  11748. +
  11749. +#define local_unlock_irqrestore_on(lvar, flags, cpu) \
  11750. + do { \
  11751. + __local_unlock_irqrestore(&per_cpu(lvar, cpu), flags); \
  11752. + } while (0)
  11753. +
  11754. +#define local_spin_trylock_irq(lvar, lock) \
  11755. + ({ \
  11756. + int __locked; \
  11757. + local_lock_irq(lvar); \
  11758. + __locked = spin_trylock(lock); \
  11759. + if (!__locked) \
  11760. + local_unlock_irq(lvar); \
  11761. + __locked; \
  11762. + })
  11763. +
  11764. +#define local_spin_lock_irq(lvar, lock) \
  11765. + do { \
  11766. + local_lock_irq(lvar); \
  11767. + spin_lock(lock); \
  11768. + } while (0)
  11769. +
  11770. +#define local_spin_unlock_irq(lvar, lock) \
  11771. + do { \
  11772. + spin_unlock(lock); \
  11773. + local_unlock_irq(lvar); \
  11774. + } while (0)
  11775. +
  11776. +#define local_spin_lock_irqsave(lvar, lock, flags) \
  11777. + do { \
  11778. + local_lock_irqsave(lvar, flags); \
  11779. + spin_lock(lock); \
  11780. + } while (0)
  11781. +
  11782. +#define local_spin_unlock_irqrestore(lvar, lock, flags) \
  11783. + do { \
  11784. + spin_unlock(lock); \
  11785. + local_unlock_irqrestore(lvar, flags); \
  11786. + } while (0)
  11787. +
  11788. +#define get_locked_var(lvar, var) \
  11789. + (*({ \
  11790. + local_lock(lvar); \
  11791. + this_cpu_ptr(&var); \
  11792. + }))
  11793. +
  11794. +#define put_locked_var(lvar, var) local_unlock(lvar);
  11795. +
  11796. +#define local_lock_cpu(lvar) \
  11797. + ({ \
  11798. + local_lock(lvar); \
  11799. + smp_processor_id(); \
  11800. + })
  11801. +
  11802. +#define local_unlock_cpu(lvar) local_unlock(lvar)
  11803. +
  11804. +#else /* PREEMPT_RT_BASE */
  11805. +
  11806. +#define DEFINE_LOCAL_IRQ_LOCK(lvar) __typeof__(const int) lvar
  11807. +#define DECLARE_LOCAL_IRQ_LOCK(lvar) extern __typeof__(const int) lvar
  11808. +
  11809. +static inline void local_irq_lock_init(int lvar) { }
  11810. +
  11811. +#define local_lock(lvar) preempt_disable()
  11812. +#define local_unlock(lvar) preempt_enable()
  11813. +#define local_lock_irq(lvar) local_irq_disable()
  11814. +#define local_unlock_irq(lvar) local_irq_enable()
  11815. +#define local_lock_irqsave(lvar, flags) local_irq_save(flags)
  11816. +#define local_unlock_irqrestore(lvar, flags) local_irq_restore(flags)
  11817. +
  11818. +#define local_spin_trylock_irq(lvar, lock) spin_trylock_irq(lock)
  11819. +#define local_spin_lock_irq(lvar, lock) spin_lock_irq(lock)
  11820. +#define local_spin_unlock_irq(lvar, lock) spin_unlock_irq(lock)
  11821. +#define local_spin_lock_irqsave(lvar, lock, flags) \
  11822. + spin_lock_irqsave(lock, flags)
  11823. +#define local_spin_unlock_irqrestore(lvar, lock, flags) \
  11824. + spin_unlock_irqrestore(lock, flags)
  11825. +
  11826. +#define get_locked_var(lvar, var) get_cpu_var(var)
  11827. +#define put_locked_var(lvar, var) put_cpu_var(var)
  11828. +
  11829. +#define local_lock_cpu(lvar) get_cpu()
  11830. +#define local_unlock_cpu(lvar) put_cpu()
  11831. +
  11832. +#endif
  11833. +
  11834. +#endif
  11835. diff -Nur linux-4.4.62.orig/include/linux/mm_types.h linux-4.4.62/include/linux/mm_types.h
  11836. --- linux-4.4.62.orig/include/linux/mm_types.h 2017-04-18 07:15:37.000000000 +0200
  11837. +++ linux-4.4.62/include/linux/mm_types.h 2017-04-18 17:38:08.126646608 +0200
  11838. @@ -11,6 +11,7 @@
  11839. #include <linux/completion.h>
  11840. #include <linux/cpumask.h>
  11841. #include <linux/uprobes.h>
  11842. +#include <linux/rcupdate.h>
  11843. #include <linux/page-flags-layout.h>
  11844. #include <asm/page.h>
  11845. #include <asm/mmu.h>
  11846. @@ -505,6 +506,9 @@
  11847. bool tlb_flush_pending;
  11848. #endif
  11849. struct uprobes_state uprobes_state;
  11850. +#ifdef CONFIG_PREEMPT_RT_BASE
  11851. + struct rcu_head delayed_drop;
  11852. +#endif
  11853. #ifdef CONFIG_X86_INTEL_MPX
  11854. /* address of the bounds directory */
  11855. void __user *bd_addr;
  11856. diff -Nur linux-4.4.62.orig/include/linux/module.h linux-4.4.62/include/linux/module.h
  11857. --- linux-4.4.62.orig/include/linux/module.h 2017-04-18 07:15:37.000000000 +0200
  11858. +++ linux-4.4.62/include/linux/module.h 2017-04-18 17:38:08.126646608 +0200
  11859. @@ -500,6 +500,7 @@
  11860. struct module *__module_text_address(unsigned long addr);
  11861. struct module *__module_address(unsigned long addr);
  11862. bool is_module_address(unsigned long addr);
  11863. +bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr);
  11864. bool is_module_percpu_address(unsigned long addr);
  11865. bool is_module_text_address(unsigned long addr);
  11866. @@ -664,6 +665,11 @@
  11867. {
  11868. return false;
  11869. }
  11870. +
  11871. +static inline bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr)
  11872. +{
  11873. + return false;
  11874. +}
  11875. static inline bool is_module_text_address(unsigned long addr)
  11876. {
  11877. diff -Nur linux-4.4.62.orig/include/linux/mutex.h linux-4.4.62/include/linux/mutex.h
  11878. --- linux-4.4.62.orig/include/linux/mutex.h 2017-04-18 07:15:37.000000000 +0200
  11879. +++ linux-4.4.62/include/linux/mutex.h 2017-04-18 17:38:08.126646608 +0200
  11880. @@ -19,6 +19,17 @@
  11881. #include <asm/processor.h>
  11882. #include <linux/osq_lock.h>
  11883. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  11884. +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
  11885. + , .dep_map = { .name = #lockname }
  11886. +#else
  11887. +# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
  11888. +#endif
  11889. +
  11890. +#ifdef CONFIG_PREEMPT_RT_FULL
  11891. +# include <linux/mutex_rt.h>
  11892. +#else
  11893. +
  11894. /*
  11895. * Simple, straightforward mutexes with strict semantics:
  11896. *
  11897. @@ -99,13 +110,6 @@
  11898. static inline void mutex_destroy(struct mutex *lock) {}
  11899. #endif
  11900. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  11901. -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
  11902. - , .dep_map = { .name = #lockname }
  11903. -#else
  11904. -# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
  11905. -#endif
  11906. -
  11907. #define __MUTEX_INITIALIZER(lockname) \
  11908. { .count = ATOMIC_INIT(1) \
  11909. , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
  11910. @@ -173,6 +177,8 @@
  11911. extern int mutex_trylock(struct mutex *lock);
  11912. extern void mutex_unlock(struct mutex *lock);
  11913. +#endif /* !PREEMPT_RT_FULL */
  11914. +
  11915. extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
  11916. #endif /* __LINUX_MUTEX_H */
  11917. diff -Nur linux-4.4.62.orig/include/linux/mutex_rt.h linux-4.4.62/include/linux/mutex_rt.h
  11918. --- linux-4.4.62.orig/include/linux/mutex_rt.h 1970-01-01 01:00:00.000000000 +0100
  11919. +++ linux-4.4.62/include/linux/mutex_rt.h 2017-04-18 17:38:08.194649247 +0200
  11920. @@ -0,0 +1,89 @@
  11921. +#ifndef __LINUX_MUTEX_RT_H
  11922. +#define __LINUX_MUTEX_RT_H
  11923. +
  11924. +#ifndef __LINUX_MUTEX_H
  11925. +#error "Please include mutex.h"
  11926. +#endif
  11927. +
  11928. +#include <linux/rtmutex.h>
  11929. +
  11930. +/* FIXME: Just for __lockfunc */
  11931. +#include <linux/spinlock.h>
  11932. +
  11933. +struct mutex {
  11934. + struct rt_mutex lock;
  11935. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  11936. + struct lockdep_map dep_map;
  11937. +#endif
  11938. +};
  11939. +
  11940. +#define __MUTEX_INITIALIZER(mutexname) \
  11941. + { \
  11942. + .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \
  11943. + __DEP_MAP_MUTEX_INITIALIZER(mutexname) \
  11944. + }
  11945. +
  11946. +#define DEFINE_MUTEX(mutexname) \
  11947. + struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
  11948. +
  11949. +extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key);
  11950. +extern void __lockfunc _mutex_lock(struct mutex *lock);
  11951. +extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock);
  11952. +extern int __lockfunc _mutex_lock_killable(struct mutex *lock);
  11953. +extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass);
  11954. +extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);
  11955. +extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass);
  11956. +extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass);
  11957. +extern int __lockfunc _mutex_trylock(struct mutex *lock);
  11958. +extern void __lockfunc _mutex_unlock(struct mutex *lock);
  11959. +
  11960. +#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock)
  11961. +#define mutex_lock(l) _mutex_lock(l)
  11962. +#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l)
  11963. +#define mutex_lock_killable(l) _mutex_lock_killable(l)
  11964. +#define mutex_trylock(l) _mutex_trylock(l)
  11965. +#define mutex_unlock(l) _mutex_unlock(l)
  11966. +
  11967. +#ifdef CONFIG_DEBUG_MUTEXES
  11968. +#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock)
  11969. +#else
  11970. +static inline void mutex_destroy(struct mutex *lock) {}
  11971. +#endif
  11972. +
  11973. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  11974. +# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s)
  11975. +# define mutex_lock_interruptible_nested(l, s) \
  11976. + _mutex_lock_interruptible_nested(l, s)
  11977. +# define mutex_lock_killable_nested(l, s) \
  11978. + _mutex_lock_killable_nested(l, s)
  11979. +
  11980. +# define mutex_lock_nest_lock(lock, nest_lock) \
  11981. +do { \
  11982. + typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \
  11983. + _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \
  11984. +} while (0)
  11985. +
  11986. +#else
  11987. +# define mutex_lock_nested(l, s) _mutex_lock(l)
  11988. +# define mutex_lock_interruptible_nested(l, s) \
  11989. + _mutex_lock_interruptible(l)
  11990. +# define mutex_lock_killable_nested(l, s) \
  11991. + _mutex_lock_killable(l)
  11992. +# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
  11993. +#endif
  11994. +
  11995. +# define mutex_init(mutex) \
  11996. +do { \
  11997. + static struct lock_class_key __key; \
  11998. + \
  11999. + rt_mutex_init(&(mutex)->lock); \
  12000. + __mutex_do_init((mutex), #mutex, &__key); \
  12001. +} while (0)
  12002. +
  12003. +# define __mutex_init(mutex, name, key) \
  12004. +do { \
  12005. + rt_mutex_init(&(mutex)->lock); \
  12006. + __mutex_do_init((mutex), name, key); \
  12007. +} while (0)
  12008. +
  12009. +#endif
  12010. diff -Nur linux-4.4.62.orig/include/linux/netdevice.h linux-4.4.62/include/linux/netdevice.h
  12011. --- linux-4.4.62.orig/include/linux/netdevice.h 2017-04-18 07:15:37.000000000 +0200
  12012. +++ linux-4.4.62/include/linux/netdevice.h 2017-04-18 17:38:08.194649247 +0200
  12013. @@ -390,7 +390,19 @@
  12014. typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb);
  12015. void __napi_schedule(struct napi_struct *n);
  12016. +
  12017. +/*
  12018. + * When PREEMPT_RT_FULL is defined, all device interrupt handlers
  12019. + * run as threads, and they can also be preempted (without PREEMPT_RT
  12020. + * interrupt threads can not be preempted). Which means that calling
  12021. + * __napi_schedule_irqoff() from an interrupt handler can be preempted
  12022. + * and can corrupt the napi->poll_list.
  12023. + */
  12024. +#ifdef CONFIG_PREEMPT_RT_FULL
  12025. +#define __napi_schedule_irqoff(n) __napi_schedule(n)
  12026. +#else
  12027. void __napi_schedule_irqoff(struct napi_struct *n);
  12028. +#endif
  12029. static inline bool napi_disable_pending(struct napi_struct *n)
  12030. {
  12031. @@ -2288,11 +2300,20 @@
  12032. void synchronize_net(void);
  12033. int init_dummy_netdev(struct net_device *dev);
  12034. +#ifdef CONFIG_PREEMPT_RT_FULL
  12035. +static inline int dev_recursion_level(void)
  12036. +{
  12037. + return current->xmit_recursion;
  12038. +}
  12039. +
  12040. +#else
  12041. +
  12042. DECLARE_PER_CPU(int, xmit_recursion);
  12043. static inline int dev_recursion_level(void)
  12044. {
  12045. return this_cpu_read(xmit_recursion);
  12046. }
  12047. +#endif
  12048. struct net_device *dev_get_by_index(struct net *net, int ifindex);
  12049. struct net_device *__dev_get_by_index(struct net *net, int ifindex);
  12050. @@ -2610,6 +2631,7 @@
  12051. unsigned int dropped;
  12052. struct sk_buff_head input_pkt_queue;
  12053. struct napi_struct backlog;
  12054. + struct sk_buff_head tofree_queue;
  12055. };
  12056. diff -Nur linux-4.4.62.orig/include/linux/netfilter/x_tables.h linux-4.4.62/include/linux/netfilter/x_tables.h
  12057. --- linux-4.4.62.orig/include/linux/netfilter/x_tables.h 2017-04-18 07:15:37.000000000 +0200
  12058. +++ linux-4.4.62/include/linux/netfilter/x_tables.h 2017-04-18 17:38:08.194649247 +0200
  12059. @@ -4,6 +4,7 @@
  12060. #include <linux/netdevice.h>
  12061. #include <linux/static_key.h>
  12062. +#include <linux/locallock.h>
  12063. #include <uapi/linux/netfilter/x_tables.h>
  12064. /**
  12065. @@ -289,6 +290,8 @@
  12066. */
  12067. DECLARE_PER_CPU(seqcount_t, xt_recseq);
  12068. +DECLARE_LOCAL_IRQ_LOCK(xt_write_lock);
  12069. +
  12070. /* xt_tee_enabled - true if x_tables needs to handle reentrancy
  12071. *
  12072. * Enabled if current ip(6)tables ruleset has at least one -j TEE rule.
  12073. @@ -309,6 +312,9 @@
  12074. {
  12075. unsigned int addend;
  12076. + /* RT protection */
  12077. + local_lock(xt_write_lock);
  12078. +
  12079. /*
  12080. * Low order bit of sequence is set if we already
  12081. * called xt_write_recseq_begin().
  12082. @@ -339,6 +345,7 @@
  12083. /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */
  12084. smp_wmb();
  12085. __this_cpu_add(xt_recseq.sequence, addend);
  12086. + local_unlock(xt_write_lock);
  12087. }
  12088. /*
  12089. diff -Nur linux-4.4.62.orig/include/linux/notifier.h linux-4.4.62/include/linux/notifier.h
  12090. --- linux-4.4.62.orig/include/linux/notifier.h 2017-04-18 07:15:37.000000000 +0200
  12091. +++ linux-4.4.62/include/linux/notifier.h 2017-04-18 17:38:08.194649247 +0200
  12092. @@ -6,7 +6,7 @@
  12093. *
  12094. * Alan Cox <Alan.Cox@linux.org>
  12095. */
  12096. -
  12097. +
  12098. #ifndef _LINUX_NOTIFIER_H
  12099. #define _LINUX_NOTIFIER_H
  12100. #include <linux/errno.h>
  12101. @@ -42,9 +42,7 @@
  12102. * in srcu_notifier_call_chain(): no cache bounces and no memory barriers.
  12103. * As compensation, srcu_notifier_chain_unregister() is rather expensive.
  12104. * SRCU notifier chains should be used when the chain will be called very
  12105. - * often but notifier_blocks will seldom be removed. Also, SRCU notifier
  12106. - * chains are slightly more difficult to use because they require special
  12107. - * runtime initialization.
  12108. + * often but notifier_blocks will seldom be removed.
  12109. */
  12110. typedef int (*notifier_fn_t)(struct notifier_block *nb,
  12111. @@ -88,7 +86,7 @@
  12112. (name)->head = NULL; \
  12113. } while (0)
  12114. -/* srcu_notifier_heads must be initialized and cleaned up dynamically */
  12115. +/* srcu_notifier_heads must be cleaned up dynamically */
  12116. extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
  12117. #define srcu_cleanup_notifier_head(name) \
  12118. cleanup_srcu_struct(&(name)->srcu);
  12119. @@ -101,7 +99,13 @@
  12120. .head = NULL }
  12121. #define RAW_NOTIFIER_INIT(name) { \
  12122. .head = NULL }
  12123. -/* srcu_notifier_heads cannot be initialized statically */
  12124. +
  12125. +#define SRCU_NOTIFIER_INIT(name, pcpu) \
  12126. + { \
  12127. + .mutex = __MUTEX_INITIALIZER(name.mutex), \
  12128. + .head = NULL, \
  12129. + .srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu), \
  12130. + }
  12131. #define ATOMIC_NOTIFIER_HEAD(name) \
  12132. struct atomic_notifier_head name = \
  12133. @@ -113,6 +117,18 @@
  12134. struct raw_notifier_head name = \
  12135. RAW_NOTIFIER_INIT(name)
  12136. +#define _SRCU_NOTIFIER_HEAD(name, mod) \
  12137. + static DEFINE_PER_CPU(struct srcu_struct_array, \
  12138. + name##_head_srcu_array); \
  12139. + mod struct srcu_notifier_head name = \
  12140. + SRCU_NOTIFIER_INIT(name, name##_head_srcu_array)
  12141. +
  12142. +#define SRCU_NOTIFIER_HEAD(name) \
  12143. + _SRCU_NOTIFIER_HEAD(name, )
  12144. +
  12145. +#define SRCU_NOTIFIER_HEAD_STATIC(name) \
  12146. + _SRCU_NOTIFIER_HEAD(name, static)
  12147. +
  12148. #ifdef __KERNEL__
  12149. extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
  12150. @@ -182,12 +198,12 @@
  12151. /*
  12152. * Declared notifiers so far. I can imagine quite a few more chains
  12153. - * over time (eg laptop power reset chains, reboot chain (to clean
  12154. + * over time (eg laptop power reset chains, reboot chain (to clean
  12155. * device units up), device [un]mount chain, module load/unload chain,
  12156. - * low memory chain, screenblank chain (for plug in modular screenblankers)
  12157. + * low memory chain, screenblank chain (for plug in modular screenblankers)
  12158. * VC switch chains (for loadable kernel svgalib VC switch helpers) etc...
  12159. */
  12160. -
  12161. +
  12162. /* CPU notfiers are defined in include/linux/cpu.h. */
  12163. /* netdevice notifiers are defined in include/linux/netdevice.h */
  12164. diff -Nur linux-4.4.62.orig/include/linux/percpu.h linux-4.4.62/include/linux/percpu.h
  12165. --- linux-4.4.62.orig/include/linux/percpu.h 2017-04-18 07:15:37.000000000 +0200
  12166. +++ linux-4.4.62/include/linux/percpu.h 2017-04-18 17:38:08.194649247 +0200
  12167. @@ -24,6 +24,35 @@
  12168. PERCPU_MODULE_RESERVE)
  12169. #endif
  12170. +#ifdef CONFIG_PREEMPT_RT_FULL
  12171. +
  12172. +#define get_local_var(var) (*({ \
  12173. + migrate_disable(); \
  12174. + this_cpu_ptr(&var); }))
  12175. +
  12176. +#define put_local_var(var) do { \
  12177. + (void)&(var); \
  12178. + migrate_enable(); \
  12179. +} while (0)
  12180. +
  12181. +# define get_local_ptr(var) ({ \
  12182. + migrate_disable(); \
  12183. + this_cpu_ptr(var); })
  12184. +
  12185. +# define put_local_ptr(var) do { \
  12186. + (void)(var); \
  12187. + migrate_enable(); \
  12188. +} while (0)
  12189. +
  12190. +#else
  12191. +
  12192. +#define get_local_var(var) get_cpu_var(var)
  12193. +#define put_local_var(var) put_cpu_var(var)
  12194. +#define get_local_ptr(var) get_cpu_ptr(var)
  12195. +#define put_local_ptr(var) put_cpu_ptr(var)
  12196. +
  12197. +#endif
  12198. +
  12199. /* minimum unit size, also is the maximum supported allocation size */
  12200. #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10)
  12201. @@ -116,6 +145,7 @@
  12202. #endif
  12203. extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align);
  12204. +extern bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr);
  12205. extern bool is_kernel_percpu_address(unsigned long addr);
  12206. #if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
  12207. diff -Nur linux-4.4.62.orig/include/linux/pid.h linux-4.4.62/include/linux/pid.h
  12208. --- linux-4.4.62.orig/include/linux/pid.h 2017-04-18 07:15:37.000000000 +0200
  12209. +++ linux-4.4.62/include/linux/pid.h 2017-04-18 17:38:08.194649247 +0200
  12210. @@ -2,6 +2,7 @@
  12211. #define _LINUX_PID_H
  12212. #include <linux/rcupdate.h>
  12213. +#include <linux/atomic.h>
  12214. enum pid_type
  12215. {
  12216. diff -Nur linux-4.4.62.orig/include/linux/preempt.h linux-4.4.62/include/linux/preempt.h
  12217. --- linux-4.4.62.orig/include/linux/preempt.h 2017-04-18 07:15:37.000000000 +0200
  12218. +++ linux-4.4.62/include/linux/preempt.h 2017-04-18 17:38:08.194649247 +0200
  12219. @@ -50,7 +50,11 @@
  12220. #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
  12221. #define NMI_OFFSET (1UL << NMI_SHIFT)
  12222. -#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
  12223. +#ifndef CONFIG_PREEMPT_RT_FULL
  12224. +# define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
  12225. +#else
  12226. +# define SOFTIRQ_DISABLE_OFFSET (0)
  12227. +#endif
  12228. /* We use the MSB mostly because its available */
  12229. #define PREEMPT_NEED_RESCHED 0x80000000
  12230. @@ -59,9 +63,15 @@
  12231. #include <asm/preempt.h>
  12232. #define hardirq_count() (preempt_count() & HARDIRQ_MASK)
  12233. -#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
  12234. #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
  12235. | NMI_MASK))
  12236. +#ifndef CONFIG_PREEMPT_RT_FULL
  12237. +# define softirq_count() (preempt_count() & SOFTIRQ_MASK)
  12238. +# define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
  12239. +#else
  12240. +# define softirq_count() (0UL)
  12241. +extern int in_serving_softirq(void);
  12242. +#endif
  12243. /*
  12244. * Are we doing bottom half or hardware interrupt processing?
  12245. @@ -72,7 +82,6 @@
  12246. #define in_irq() (hardirq_count())
  12247. #define in_softirq() (softirq_count())
  12248. #define in_interrupt() (irq_count())
  12249. -#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
  12250. /*
  12251. * Are we in NMI context?
  12252. @@ -91,7 +100,11 @@
  12253. /*
  12254. * The preempt_count offset after spin_lock()
  12255. */
  12256. +#if !defined(CONFIG_PREEMPT_RT_FULL)
  12257. #define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET
  12258. +#else
  12259. +#define PREEMPT_LOCK_OFFSET 0
  12260. +#endif
  12261. /*
  12262. * The preempt_count offset needed for things like:
  12263. @@ -140,6 +153,20 @@
  12264. #define preempt_count_inc() preempt_count_add(1)
  12265. #define preempt_count_dec() preempt_count_sub(1)
  12266. +#ifdef CONFIG_PREEMPT_LAZY
  12267. +#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0)
  12268. +#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0)
  12269. +#define inc_preempt_lazy_count() add_preempt_lazy_count(1)
  12270. +#define dec_preempt_lazy_count() sub_preempt_lazy_count(1)
  12271. +#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count)
  12272. +#else
  12273. +#define add_preempt_lazy_count(val) do { } while (0)
  12274. +#define sub_preempt_lazy_count(val) do { } while (0)
  12275. +#define inc_preempt_lazy_count() do { } while (0)
  12276. +#define dec_preempt_lazy_count() do { } while (0)
  12277. +#define preempt_lazy_count() (0)
  12278. +#endif
  12279. +
  12280. #ifdef CONFIG_PREEMPT_COUNT
  12281. #define preempt_disable() \
  12282. @@ -148,13 +175,25 @@
  12283. barrier(); \
  12284. } while (0)
  12285. +#define preempt_lazy_disable() \
  12286. +do { \
  12287. + inc_preempt_lazy_count(); \
  12288. + barrier(); \
  12289. +} while (0)
  12290. +
  12291. #define sched_preempt_enable_no_resched() \
  12292. do { \
  12293. barrier(); \
  12294. preempt_count_dec(); \
  12295. } while (0)
  12296. -#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
  12297. +#ifdef CONFIG_PREEMPT_RT_BASE
  12298. +# define preempt_enable_no_resched() sched_preempt_enable_no_resched()
  12299. +# define preempt_check_resched_rt() preempt_check_resched()
  12300. +#else
  12301. +# define preempt_enable_no_resched() preempt_enable()
  12302. +# define preempt_check_resched_rt() barrier();
  12303. +#endif
  12304. #define preemptible() (preempt_count() == 0 && !irqs_disabled())
  12305. @@ -179,6 +218,13 @@
  12306. __preempt_schedule(); \
  12307. } while (0)
  12308. +#define preempt_lazy_enable() \
  12309. +do { \
  12310. + dec_preempt_lazy_count(); \
  12311. + barrier(); \
  12312. + preempt_check_resched(); \
  12313. +} while (0)
  12314. +
  12315. #else /* !CONFIG_PREEMPT */
  12316. #define preempt_enable() \
  12317. do { \
  12318. @@ -224,6 +270,7 @@
  12319. #define preempt_disable_notrace() barrier()
  12320. #define preempt_enable_no_resched_notrace() barrier()
  12321. #define preempt_enable_notrace() barrier()
  12322. +#define preempt_check_resched_rt() barrier()
  12323. #define preemptible() 0
  12324. #endif /* CONFIG_PREEMPT_COUNT */
  12325. @@ -244,10 +291,31 @@
  12326. } while (0)
  12327. #define preempt_fold_need_resched() \
  12328. do { \
  12329. - if (tif_need_resched()) \
  12330. + if (tif_need_resched_now()) \
  12331. set_preempt_need_resched(); \
  12332. } while (0)
  12333. +#ifdef CONFIG_PREEMPT_RT_FULL
  12334. +# define preempt_disable_rt() preempt_disable()
  12335. +# define preempt_enable_rt() preempt_enable()
  12336. +# define preempt_disable_nort() barrier()
  12337. +# define preempt_enable_nort() barrier()
  12338. +# ifdef CONFIG_SMP
  12339. + extern void migrate_disable(void);
  12340. + extern void migrate_enable(void);
  12341. +# else /* CONFIG_SMP */
  12342. +# define migrate_disable() barrier()
  12343. +# define migrate_enable() barrier()
  12344. +# endif /* CONFIG_SMP */
  12345. +#else
  12346. +# define preempt_disable_rt() barrier()
  12347. +# define preempt_enable_rt() barrier()
  12348. +# define preempt_disable_nort() preempt_disable()
  12349. +# define preempt_enable_nort() preempt_enable()
  12350. +# define migrate_disable() preempt_disable()
  12351. +# define migrate_enable() preempt_enable()
  12352. +#endif
  12353. +
  12354. #ifdef CONFIG_PREEMPT_NOTIFIERS
  12355. struct preempt_notifier;
  12356. diff -Nur linux-4.4.62.orig/include/linux/printk.h linux-4.4.62/include/linux/printk.h
  12357. --- linux-4.4.62.orig/include/linux/printk.h 2017-04-18 07:15:37.000000000 +0200
  12358. +++ linux-4.4.62/include/linux/printk.h 2017-04-18 17:38:08.194649247 +0200
  12359. @@ -117,9 +117,11 @@
  12360. #ifdef CONFIG_EARLY_PRINTK
  12361. extern asmlinkage __printf(1, 2)
  12362. void early_printk(const char *fmt, ...);
  12363. +extern void printk_kill(void);
  12364. #else
  12365. static inline __printf(1, 2) __cold
  12366. void early_printk(const char *s, ...) { }
  12367. +static inline void printk_kill(void) { }
  12368. #endif
  12369. typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args);
  12370. diff -Nur linux-4.4.62.orig/include/linux/radix-tree.h linux-4.4.62/include/linux/radix-tree.h
  12371. --- linux-4.4.62.orig/include/linux/radix-tree.h 2017-04-18 07:15:37.000000000 +0200
  12372. +++ linux-4.4.62/include/linux/radix-tree.h 2017-04-18 17:38:08.194649247 +0200
  12373. @@ -279,6 +279,8 @@
  12374. unsigned long first_index, unsigned int max_items);
  12375. int radix_tree_preload(gfp_t gfp_mask);
  12376. int radix_tree_maybe_preload(gfp_t gfp_mask);
  12377. +void radix_tree_preload_end(void);
  12378. +
  12379. void radix_tree_init(void);
  12380. void *radix_tree_tag_set(struct radix_tree_root *root,
  12381. unsigned long index, unsigned int tag);
  12382. @@ -301,11 +303,6 @@
  12383. int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
  12384. unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item);
  12385. -static inline void radix_tree_preload_end(void)
  12386. -{
  12387. - preempt_enable();
  12388. -}
  12389. -
  12390. /**
  12391. * struct radix_tree_iter - radix tree iterator state
  12392. *
  12393. diff -Nur linux-4.4.62.orig/include/linux/random.h linux-4.4.62/include/linux/random.h
  12394. --- linux-4.4.62.orig/include/linux/random.h 2017-04-18 07:15:37.000000000 +0200
  12395. +++ linux-4.4.62/include/linux/random.h 2017-04-18 17:38:08.194649247 +0200
  12396. @@ -20,7 +20,7 @@
  12397. extern void add_device_randomness(const void *, unsigned int);
  12398. extern void add_input_randomness(unsigned int type, unsigned int code,
  12399. unsigned int value);
  12400. -extern void add_interrupt_randomness(int irq, int irq_flags);
  12401. +extern void add_interrupt_randomness(int irq, int irq_flags, __u64 ip);
  12402. extern void get_random_bytes(void *buf, int nbytes);
  12403. extern int add_random_ready_callback(struct random_ready_callback *rdy);
  12404. diff -Nur linux-4.4.62.orig/include/linux/rbtree.h linux-4.4.62/include/linux/rbtree.h
  12405. --- linux-4.4.62.orig/include/linux/rbtree.h 2017-04-18 07:15:37.000000000 +0200
  12406. +++ linux-4.4.62/include/linux/rbtree.h 2017-04-18 17:38:08.194649247 +0200
  12407. @@ -31,7 +31,6 @@
  12408. #include <linux/kernel.h>
  12409. #include <linux/stddef.h>
  12410. -#include <linux/rcupdate.h>
  12411. struct rb_node {
  12412. unsigned long __rb_parent_color;
  12413. @@ -86,14 +85,8 @@
  12414. *rb_link = node;
  12415. }
  12416. -static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent,
  12417. - struct rb_node **rb_link)
  12418. -{
  12419. - node->__rb_parent_color = (unsigned long)parent;
  12420. - node->rb_left = node->rb_right = NULL;
  12421. -
  12422. - rcu_assign_pointer(*rb_link, node);
  12423. -}
  12424. +void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent,
  12425. + struct rb_node **rb_link);
  12426. #define rb_entry_safe(ptr, type, member) \
  12427. ({ typeof(ptr) ____ptr = (ptr); \
  12428. diff -Nur linux-4.4.62.orig/include/linux/rcupdate.h linux-4.4.62/include/linux/rcupdate.h
  12429. --- linux-4.4.62.orig/include/linux/rcupdate.h 2017-04-18 07:15:37.000000000 +0200
  12430. +++ linux-4.4.62/include/linux/rcupdate.h 2017-04-18 17:38:08.194649247 +0200
  12431. @@ -169,6 +169,9 @@
  12432. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  12433. +#ifdef CONFIG_PREEMPT_RT_FULL
  12434. +#define call_rcu_bh call_rcu
  12435. +#else
  12436. /**
  12437. * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
  12438. * @head: structure to be used for queueing the RCU updates.
  12439. @@ -192,6 +195,7 @@
  12440. */
  12441. void call_rcu_bh(struct rcu_head *head,
  12442. rcu_callback_t func);
  12443. +#endif
  12444. /**
  12445. * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
  12446. @@ -292,6 +296,11 @@
  12447. * types of kernel builds, the rcu_read_lock() nesting depth is unknowable.
  12448. */
  12449. #define rcu_preempt_depth() (current->rcu_read_lock_nesting)
  12450. +#ifndef CONFIG_PREEMPT_RT_FULL
  12451. +#define sched_rcu_preempt_depth() rcu_preempt_depth()
  12452. +#else
  12453. +static inline int sched_rcu_preempt_depth(void) { return 0; }
  12454. +#endif
  12455. #else /* #ifdef CONFIG_PREEMPT_RCU */
  12456. @@ -317,6 +326,8 @@
  12457. return 0;
  12458. }
  12459. +#define sched_rcu_preempt_depth() rcu_preempt_depth()
  12460. +
  12461. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  12462. /* Internal to kernel */
  12463. @@ -489,7 +500,14 @@
  12464. int debug_lockdep_rcu_enabled(void);
  12465. int rcu_read_lock_held(void);
  12466. +#ifdef CONFIG_PREEMPT_RT_FULL
  12467. +static inline int rcu_read_lock_bh_held(void)
  12468. +{
  12469. + return rcu_read_lock_held();
  12470. +}
  12471. +#else
  12472. int rcu_read_lock_bh_held(void);
  12473. +#endif
  12474. /**
  12475. * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
  12476. @@ -937,10 +955,14 @@
  12477. static inline void rcu_read_lock_bh(void)
  12478. {
  12479. local_bh_disable();
  12480. +#ifdef CONFIG_PREEMPT_RT_FULL
  12481. + rcu_read_lock();
  12482. +#else
  12483. __acquire(RCU_BH);
  12484. rcu_lock_acquire(&rcu_bh_lock_map);
  12485. RCU_LOCKDEP_WARN(!rcu_is_watching(),
  12486. "rcu_read_lock_bh() used illegally while idle");
  12487. +#endif
  12488. }
  12489. /*
  12490. @@ -950,10 +972,14 @@
  12491. */
  12492. static inline void rcu_read_unlock_bh(void)
  12493. {
  12494. +#ifdef CONFIG_PREEMPT_RT_FULL
  12495. + rcu_read_unlock();
  12496. +#else
  12497. RCU_LOCKDEP_WARN(!rcu_is_watching(),
  12498. "rcu_read_unlock_bh() used illegally while idle");
  12499. rcu_lock_release(&rcu_bh_lock_map);
  12500. __release(RCU_BH);
  12501. +#endif
  12502. local_bh_enable();
  12503. }
  12504. diff -Nur linux-4.4.62.orig/include/linux/rcutree.h linux-4.4.62/include/linux/rcutree.h
  12505. --- linux-4.4.62.orig/include/linux/rcutree.h 2017-04-18 07:15:37.000000000 +0200
  12506. +++ linux-4.4.62/include/linux/rcutree.h 2017-04-18 17:38:08.194649247 +0200
  12507. @@ -44,7 +44,11 @@
  12508. rcu_note_context_switch();
  12509. }
  12510. +#ifdef CONFIG_PREEMPT_RT_FULL
  12511. +# define synchronize_rcu_bh synchronize_rcu
  12512. +#else
  12513. void synchronize_rcu_bh(void);
  12514. +#endif
  12515. void synchronize_sched_expedited(void);
  12516. void synchronize_rcu_expedited(void);
  12517. @@ -72,7 +76,11 @@
  12518. }
  12519. void rcu_barrier(void);
  12520. +#ifdef CONFIG_PREEMPT_RT_FULL
  12521. +# define rcu_barrier_bh rcu_barrier
  12522. +#else
  12523. void rcu_barrier_bh(void);
  12524. +#endif
  12525. void rcu_barrier_sched(void);
  12526. unsigned long get_state_synchronize_rcu(void);
  12527. void cond_synchronize_rcu(unsigned long oldstate);
  12528. @@ -85,12 +93,10 @@
  12529. unsigned long rcu_batches_started_bh(void);
  12530. unsigned long rcu_batches_started_sched(void);
  12531. unsigned long rcu_batches_completed(void);
  12532. -unsigned long rcu_batches_completed_bh(void);
  12533. unsigned long rcu_batches_completed_sched(void);
  12534. void show_rcu_gp_kthreads(void);
  12535. void rcu_force_quiescent_state(void);
  12536. -void rcu_bh_force_quiescent_state(void);
  12537. void rcu_sched_force_quiescent_state(void);
  12538. void rcu_idle_enter(void);
  12539. @@ -105,6 +111,14 @@
  12540. bool rcu_is_watching(void);
  12541. +#ifndef CONFIG_PREEMPT_RT_FULL
  12542. +void rcu_bh_force_quiescent_state(void);
  12543. +unsigned long rcu_batches_completed_bh(void);
  12544. +#else
  12545. +# define rcu_bh_force_quiescent_state rcu_force_quiescent_state
  12546. +# define rcu_batches_completed_bh rcu_batches_completed
  12547. +#endif
  12548. +
  12549. void rcu_all_qs(void);
  12550. #endif /* __LINUX_RCUTREE_H */
  12551. diff -Nur linux-4.4.62.orig/include/linux/rtmutex.h linux-4.4.62/include/linux/rtmutex.h
  12552. --- linux-4.4.62.orig/include/linux/rtmutex.h 2017-04-18 07:15:37.000000000 +0200
  12553. +++ linux-4.4.62/include/linux/rtmutex.h 2017-04-18 17:38:08.194649247 +0200
  12554. @@ -13,11 +13,15 @@
  12555. #define __LINUX_RT_MUTEX_H
  12556. #include <linux/linkage.h>
  12557. +#include <linux/spinlock_types_raw.h>
  12558. #include <linux/rbtree.h>
  12559. -#include <linux/spinlock_types.h>
  12560. extern int max_lock_depth; /* for sysctl */
  12561. +#ifdef CONFIG_DEBUG_MUTEXES
  12562. +#include <linux/debug_locks.h>
  12563. +#endif
  12564. +
  12565. /**
  12566. * The rt_mutex structure
  12567. *
  12568. @@ -31,8 +35,8 @@
  12569. struct rb_root waiters;
  12570. struct rb_node *waiters_leftmost;
  12571. struct task_struct *owner;
  12572. -#ifdef CONFIG_DEBUG_RT_MUTEXES
  12573. int save_state;
  12574. +#ifdef CONFIG_DEBUG_RT_MUTEXES
  12575. const char *name, *file;
  12576. int line;
  12577. void *magic;
  12578. @@ -55,22 +59,33 @@
  12579. # define rt_mutex_debug_check_no_locks_held(task) do { } while (0)
  12580. #endif
  12581. +# define rt_mutex_init(mutex) \
  12582. + do { \
  12583. + raw_spin_lock_init(&(mutex)->wait_lock); \
  12584. + __rt_mutex_init(mutex, #mutex); \
  12585. + } while (0)
  12586. +
  12587. #ifdef CONFIG_DEBUG_RT_MUTEXES
  12588. # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
  12589. , .name = #mutexname, .file = __FILE__, .line = __LINE__
  12590. -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, __func__)
  12591. extern void rt_mutex_debug_task_free(struct task_struct *tsk);
  12592. #else
  12593. # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
  12594. -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL)
  12595. # define rt_mutex_debug_task_free(t) do { } while (0)
  12596. #endif
  12597. -#define __RT_MUTEX_INITIALIZER(mutexname) \
  12598. - { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
  12599. +#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
  12600. + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
  12601. , .waiters = RB_ROOT \
  12602. , .owner = NULL \
  12603. - __DEBUG_RT_MUTEX_INITIALIZER(mutexname)}
  12604. + __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
  12605. +
  12606. +#define __RT_MUTEX_INITIALIZER(mutexname) \
  12607. + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) }
  12608. +
  12609. +#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \
  12610. + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
  12611. + , .save_state = 1 }
  12612. #define DEFINE_RT_MUTEX(mutexname) \
  12613. struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname)
  12614. @@ -91,6 +106,7 @@
  12615. extern void rt_mutex_lock(struct rt_mutex *lock);
  12616. extern int rt_mutex_lock_interruptible(struct rt_mutex *lock);
  12617. +extern int rt_mutex_lock_killable(struct rt_mutex *lock);
  12618. extern int rt_mutex_timed_lock(struct rt_mutex *lock,
  12619. struct hrtimer_sleeper *timeout);
  12620. diff -Nur linux-4.4.62.orig/include/linux/rwlock_rt.h linux-4.4.62/include/linux/rwlock_rt.h
  12621. --- linux-4.4.62.orig/include/linux/rwlock_rt.h 1970-01-01 01:00:00.000000000 +0100
  12622. +++ linux-4.4.62/include/linux/rwlock_rt.h 2017-04-18 17:38:08.194649247 +0200
  12623. @@ -0,0 +1,99 @@
  12624. +#ifndef __LINUX_RWLOCK_RT_H
  12625. +#define __LINUX_RWLOCK_RT_H
  12626. +
  12627. +#ifndef __LINUX_SPINLOCK_H
  12628. +#error Do not include directly. Use spinlock.h
  12629. +#endif
  12630. +
  12631. +#define rwlock_init(rwl) \
  12632. +do { \
  12633. + static struct lock_class_key __key; \
  12634. + \
  12635. + rt_mutex_init(&(rwl)->lock); \
  12636. + __rt_rwlock_init(rwl, #rwl, &__key); \
  12637. +} while (0)
  12638. +
  12639. +extern void __lockfunc rt_write_lock(rwlock_t *rwlock);
  12640. +extern void __lockfunc rt_read_lock(rwlock_t *rwlock);
  12641. +extern int __lockfunc rt_write_trylock(rwlock_t *rwlock);
  12642. +extern int __lockfunc rt_write_trylock_irqsave(rwlock_t *trylock, unsigned long *flags);
  12643. +extern int __lockfunc rt_read_trylock(rwlock_t *rwlock);
  12644. +extern void __lockfunc rt_write_unlock(rwlock_t *rwlock);
  12645. +extern void __lockfunc rt_read_unlock(rwlock_t *rwlock);
  12646. +extern unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock);
  12647. +extern unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock);
  12648. +extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key);
  12649. +
  12650. +#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock))
  12651. +#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock))
  12652. +
  12653. +#define write_trylock_irqsave(lock, flags) \
  12654. + __cond_lock(lock, rt_write_trylock_irqsave(lock, &flags))
  12655. +
  12656. +#define read_lock_irqsave(lock, flags) \
  12657. + do { \
  12658. + typecheck(unsigned long, flags); \
  12659. + flags = rt_read_lock_irqsave(lock); \
  12660. + } while (0)
  12661. +
  12662. +#define write_lock_irqsave(lock, flags) \
  12663. + do { \
  12664. + typecheck(unsigned long, flags); \
  12665. + flags = rt_write_lock_irqsave(lock); \
  12666. + } while (0)
  12667. +
  12668. +#define read_lock(lock) rt_read_lock(lock)
  12669. +
  12670. +#define read_lock_bh(lock) \
  12671. + do { \
  12672. + local_bh_disable(); \
  12673. + rt_read_lock(lock); \
  12674. + } while (0)
  12675. +
  12676. +#define read_lock_irq(lock) read_lock(lock)
  12677. +
  12678. +#define write_lock(lock) rt_write_lock(lock)
  12679. +
  12680. +#define write_lock_bh(lock) \
  12681. + do { \
  12682. + local_bh_disable(); \
  12683. + rt_write_lock(lock); \
  12684. + } while (0)
  12685. +
  12686. +#define write_lock_irq(lock) write_lock(lock)
  12687. +
  12688. +#define read_unlock(lock) rt_read_unlock(lock)
  12689. +
  12690. +#define read_unlock_bh(lock) \
  12691. + do { \
  12692. + rt_read_unlock(lock); \
  12693. + local_bh_enable(); \
  12694. + } while (0)
  12695. +
  12696. +#define read_unlock_irq(lock) read_unlock(lock)
  12697. +
  12698. +#define write_unlock(lock) rt_write_unlock(lock)
  12699. +
  12700. +#define write_unlock_bh(lock) \
  12701. + do { \
  12702. + rt_write_unlock(lock); \
  12703. + local_bh_enable(); \
  12704. + } while (0)
  12705. +
  12706. +#define write_unlock_irq(lock) write_unlock(lock)
  12707. +
  12708. +#define read_unlock_irqrestore(lock, flags) \
  12709. + do { \
  12710. + typecheck(unsigned long, flags); \
  12711. + (void) flags; \
  12712. + rt_read_unlock(lock); \
  12713. + } while (0)
  12714. +
  12715. +#define write_unlock_irqrestore(lock, flags) \
  12716. + do { \
  12717. + typecheck(unsigned long, flags); \
  12718. + (void) flags; \
  12719. + rt_write_unlock(lock); \
  12720. + } while (0)
  12721. +
  12722. +#endif
  12723. diff -Nur linux-4.4.62.orig/include/linux/rwlock_types.h linux-4.4.62/include/linux/rwlock_types.h
  12724. --- linux-4.4.62.orig/include/linux/rwlock_types.h 2017-04-18 07:15:37.000000000 +0200
  12725. +++ linux-4.4.62/include/linux/rwlock_types.h 2017-04-18 17:38:08.194649247 +0200
  12726. @@ -1,6 +1,10 @@
  12727. #ifndef __LINUX_RWLOCK_TYPES_H
  12728. #define __LINUX_RWLOCK_TYPES_H
  12729. +#if !defined(__LINUX_SPINLOCK_TYPES_H)
  12730. +# error "Do not include directly, include spinlock_types.h"
  12731. +#endif
  12732. +
  12733. /*
  12734. * include/linux/rwlock_types.h - generic rwlock type definitions
  12735. * and initializers
  12736. @@ -43,6 +47,7 @@
  12737. RW_DEP_MAP_INIT(lockname) }
  12738. #endif
  12739. -#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x)
  12740. +#define DEFINE_RWLOCK(name) \
  12741. + rwlock_t name __cacheline_aligned_in_smp = __RW_LOCK_UNLOCKED(name)
  12742. #endif /* __LINUX_RWLOCK_TYPES_H */
  12743. diff -Nur linux-4.4.62.orig/include/linux/rwlock_types_rt.h linux-4.4.62/include/linux/rwlock_types_rt.h
  12744. --- linux-4.4.62.orig/include/linux/rwlock_types_rt.h 1970-01-01 01:00:00.000000000 +0100
  12745. +++ linux-4.4.62/include/linux/rwlock_types_rt.h 2017-04-18 17:38:08.194649247 +0200
  12746. @@ -0,0 +1,33 @@
  12747. +#ifndef __LINUX_RWLOCK_TYPES_RT_H
  12748. +#define __LINUX_RWLOCK_TYPES_RT_H
  12749. +
  12750. +#ifndef __LINUX_SPINLOCK_TYPES_H
  12751. +#error "Do not include directly. Include spinlock_types.h instead"
  12752. +#endif
  12753. +
  12754. +/*
  12755. + * rwlocks - rtmutex which allows single reader recursion
  12756. + */
  12757. +typedef struct {
  12758. + struct rt_mutex lock;
  12759. + int read_depth;
  12760. + unsigned int break_lock;
  12761. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12762. + struct lockdep_map dep_map;
  12763. +#endif
  12764. +} rwlock_t;
  12765. +
  12766. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12767. +# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
  12768. +#else
  12769. +# define RW_DEP_MAP_INIT(lockname)
  12770. +#endif
  12771. +
  12772. +#define __RW_LOCK_UNLOCKED(name) \
  12773. + { .lock = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.lock), \
  12774. + RW_DEP_MAP_INIT(name) }
  12775. +
  12776. +#define DEFINE_RWLOCK(name) \
  12777. + rwlock_t name __cacheline_aligned_in_smp = __RW_LOCK_UNLOCKED(name)
  12778. +
  12779. +#endif
  12780. diff -Nur linux-4.4.62.orig/include/linux/rwsem.h linux-4.4.62/include/linux/rwsem.h
  12781. --- linux-4.4.62.orig/include/linux/rwsem.h 2017-04-18 07:15:37.000000000 +0200
  12782. +++ linux-4.4.62/include/linux/rwsem.h 2017-04-18 17:38:08.198649401 +0200
  12783. @@ -18,6 +18,10 @@
  12784. #include <linux/osq_lock.h>
  12785. #endif
  12786. +#ifdef CONFIG_PREEMPT_RT_FULL
  12787. +#include <linux/rwsem_rt.h>
  12788. +#else /* PREEMPT_RT_FULL */
  12789. +
  12790. struct rw_semaphore;
  12791. #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
  12792. @@ -177,4 +181,6 @@
  12793. # define up_read_non_owner(sem) up_read(sem)
  12794. #endif
  12795. +#endif /* !PREEMPT_RT_FULL */
  12796. +
  12797. #endif /* _LINUX_RWSEM_H */
  12798. diff -Nur linux-4.4.62.orig/include/linux/rwsem_rt.h linux-4.4.62/include/linux/rwsem_rt.h
  12799. --- linux-4.4.62.orig/include/linux/rwsem_rt.h 1970-01-01 01:00:00.000000000 +0100
  12800. +++ linux-4.4.62/include/linux/rwsem_rt.h 2017-04-18 17:38:08.198649401 +0200
  12801. @@ -0,0 +1,152 @@
  12802. +#ifndef _LINUX_RWSEM_RT_H
  12803. +#define _LINUX_RWSEM_RT_H
  12804. +
  12805. +#ifndef _LINUX_RWSEM_H
  12806. +#error "Include rwsem.h"
  12807. +#endif
  12808. +
  12809. +/*
  12810. + * RW-semaphores are a spinlock plus a reader-depth count.
  12811. + *
  12812. + * Note that the semantics are different from the usual
  12813. + * Linux rw-sems, in PREEMPT_RT mode we do not allow
  12814. + * multiple readers to hold the lock at once, we only allow
  12815. + * a read-lock owner to read-lock recursively. This is
  12816. + * better for latency, makes the implementation inherently
  12817. + * fair and makes it simpler as well.
  12818. + */
  12819. +
  12820. +#include <linux/rtmutex.h>
  12821. +
  12822. +struct rw_semaphore {
  12823. + struct rt_mutex lock;
  12824. + int read_depth;
  12825. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12826. + struct lockdep_map dep_map;
  12827. +#endif
  12828. +};
  12829. +
  12830. +#define __RWSEM_INITIALIZER(name) \
  12831. + { .lock = __RT_MUTEX_INITIALIZER(name.lock), \
  12832. + RW_DEP_MAP_INIT(name) }
  12833. +
  12834. +#define DECLARE_RWSEM(lockname) \
  12835. + struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname)
  12836. +
  12837. +extern void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name,
  12838. + struct lock_class_key *key);
  12839. +
  12840. +#define __rt_init_rwsem(sem, name, key) \
  12841. + do { \
  12842. + rt_mutex_init(&(sem)->lock); \
  12843. + __rt_rwsem_init((sem), (name), (key));\
  12844. + } while (0)
  12845. +
  12846. +#define __init_rwsem(sem, name, key) __rt_init_rwsem(sem, name, key)
  12847. +
  12848. +# define rt_init_rwsem(sem) \
  12849. +do { \
  12850. + static struct lock_class_key __key; \
  12851. + \
  12852. + __rt_init_rwsem((sem), #sem, &__key); \
  12853. +} while (0)
  12854. +
  12855. +extern void rt_down_write(struct rw_semaphore *rwsem);
  12856. +extern void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass);
  12857. +extern void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass);
  12858. +extern void rt_down_write_nested_lock(struct rw_semaphore *rwsem,
  12859. + struct lockdep_map *nest);
  12860. +extern void rt__down_read(struct rw_semaphore *rwsem);
  12861. +extern void rt_down_read(struct rw_semaphore *rwsem);
  12862. +extern int rt_down_write_trylock(struct rw_semaphore *rwsem);
  12863. +extern int rt__down_read_trylock(struct rw_semaphore *rwsem);
  12864. +extern int rt_down_read_trylock(struct rw_semaphore *rwsem);
  12865. +extern void __rt_up_read(struct rw_semaphore *rwsem);
  12866. +extern void rt_up_read(struct rw_semaphore *rwsem);
  12867. +extern void rt_up_write(struct rw_semaphore *rwsem);
  12868. +extern void rt_downgrade_write(struct rw_semaphore *rwsem);
  12869. +
  12870. +#define init_rwsem(sem) rt_init_rwsem(sem)
  12871. +#define rwsem_is_locked(s) rt_mutex_is_locked(&(s)->lock)
  12872. +
  12873. +static inline int rwsem_is_contended(struct rw_semaphore *sem)
  12874. +{
  12875. + /* rt_mutex_has_waiters() */
  12876. + return !RB_EMPTY_ROOT(&sem->lock.waiters);
  12877. +}
  12878. +
  12879. +static inline void __down_read(struct rw_semaphore *sem)
  12880. +{
  12881. + rt__down_read(sem);
  12882. +}
  12883. +
  12884. +static inline void down_read(struct rw_semaphore *sem)
  12885. +{
  12886. + rt_down_read(sem);
  12887. +}
  12888. +
  12889. +static inline int __down_read_trylock(struct rw_semaphore *sem)
  12890. +{
  12891. + return rt__down_read_trylock(sem);
  12892. +}
  12893. +
  12894. +static inline int down_read_trylock(struct rw_semaphore *sem)
  12895. +{
  12896. + return rt_down_read_trylock(sem);
  12897. +}
  12898. +
  12899. +static inline void down_write(struct rw_semaphore *sem)
  12900. +{
  12901. + rt_down_write(sem);
  12902. +}
  12903. +
  12904. +static inline int down_write_trylock(struct rw_semaphore *sem)
  12905. +{
  12906. + return rt_down_write_trylock(sem);
  12907. +}
  12908. +
  12909. +static inline void __up_read(struct rw_semaphore *sem)
  12910. +{
  12911. + __rt_up_read(sem);
  12912. +}
  12913. +
  12914. +static inline void up_read(struct rw_semaphore *sem)
  12915. +{
  12916. + rt_up_read(sem);
  12917. +}
  12918. +
  12919. +static inline void up_write(struct rw_semaphore *sem)
  12920. +{
  12921. + rt_up_write(sem);
  12922. +}
  12923. +
  12924. +static inline void downgrade_write(struct rw_semaphore *sem)
  12925. +{
  12926. + rt_downgrade_write(sem);
  12927. +}
  12928. +
  12929. +static inline void down_read_nested(struct rw_semaphore *sem, int subclass)
  12930. +{
  12931. + return rt_down_read_nested(sem, subclass);
  12932. +}
  12933. +
  12934. +static inline void down_write_nested(struct rw_semaphore *sem, int subclass)
  12935. +{
  12936. + rt_down_write_nested(sem, subclass);
  12937. +}
  12938. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12939. +static inline void down_write_nest_lock(struct rw_semaphore *sem,
  12940. + struct rw_semaphore *nest_lock)
  12941. +{
  12942. + rt_down_write_nested_lock(sem, &nest_lock->dep_map);
  12943. +}
  12944. +
  12945. +#else
  12946. +
  12947. +static inline void down_write_nest_lock(struct rw_semaphore *sem,
  12948. + struct rw_semaphore *nest_lock)
  12949. +{
  12950. + rt_down_write_nested_lock(sem, NULL);
  12951. +}
  12952. +#endif
  12953. +#endif
  12954. diff -Nur linux-4.4.62.orig/include/linux/sched.h linux-4.4.62/include/linux/sched.h
  12955. --- linux-4.4.62.orig/include/linux/sched.h 2017-04-18 07:15:37.000000000 +0200
  12956. +++ linux-4.4.62/include/linux/sched.h 2017-04-18 17:38:08.198649401 +0200
  12957. @@ -26,6 +26,7 @@
  12958. #include <linux/nodemask.h>
  12959. #include <linux/mm_types.h>
  12960. #include <linux/preempt.h>
  12961. +#include <asm/kmap_types.h>
  12962. #include <asm/page.h>
  12963. #include <asm/ptrace.h>
  12964. @@ -182,8 +183,6 @@
  12965. static inline void update_cpu_load_nohz(void) { }
  12966. #endif
  12967. -extern unsigned long get_parent_ip(unsigned long addr);
  12968. -
  12969. extern void dump_cpu_task(int cpu);
  12970. struct seq_file;
  12971. @@ -242,10 +241,7 @@
  12972. TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
  12973. __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
  12974. -#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0)
  12975. #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0)
  12976. -#define task_is_stopped_or_traced(task) \
  12977. - ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
  12978. #define task_contributes_to_load(task) \
  12979. ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
  12980. (task->flags & PF_FROZEN) == 0 && \
  12981. @@ -311,6 +307,11 @@
  12982. #endif
  12983. +#define __set_current_state_no_track(state_value) \
  12984. + do { current->state = (state_value); } while (0)
  12985. +#define set_current_state_no_track(state_value) \
  12986. + set_mb(current->state, (state_value))
  12987. +
  12988. /* Task command name length */
  12989. #define TASK_COMM_LEN 16
  12990. @@ -970,8 +971,18 @@
  12991. struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
  12992. extern void wake_q_add(struct wake_q_head *head,
  12993. - struct task_struct *task);
  12994. -extern void wake_up_q(struct wake_q_head *head);
  12995. + struct task_struct *task);
  12996. +extern void __wake_up_q(struct wake_q_head *head, bool sleeper);
  12997. +
  12998. +static inline void wake_up_q(struct wake_q_head *head)
  12999. +{
  13000. + __wake_up_q(head, false);
  13001. +}
  13002. +
  13003. +static inline void wake_up_q_sleeper(struct wake_q_head *head)
  13004. +{
  13005. + __wake_up_q(head, true);
  13006. +}
  13007. /*
  13008. * sched-domains (multiprocessor balancing) declarations:
  13009. @@ -1379,6 +1390,7 @@
  13010. struct task_struct {
  13011. volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
  13012. + volatile long saved_state; /* saved state for "spinlock sleepers" */
  13013. void *stack;
  13014. atomic_t usage;
  13015. unsigned int flags; /* per process flags, defined below */
  13016. @@ -1415,6 +1427,12 @@
  13017. #endif
  13018. unsigned int policy;
  13019. +#ifdef CONFIG_PREEMPT_RT_FULL
  13020. + int migrate_disable;
  13021. +# ifdef CONFIG_SCHED_DEBUG
  13022. + int migrate_disable_atomic;
  13023. +# endif
  13024. +#endif
  13025. int nr_cpus_allowed;
  13026. cpumask_t cpus_allowed;
  13027. @@ -1522,11 +1540,14 @@
  13028. cputime_t gtime;
  13029. struct prev_cputime prev_cputime;
  13030. #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
  13031. - seqlock_t vtime_seqlock;
  13032. + seqcount_t vtime_seqcount;
  13033. unsigned long long vtime_snap;
  13034. enum {
  13035. - VTIME_SLEEPING = 0,
  13036. + /* Task is sleeping or running in a CPU with VTIME inactive */
  13037. + VTIME_INACTIVE = 0,
  13038. + /* Task runs in userspace in a CPU with VTIME active */
  13039. VTIME_USER,
  13040. + /* Task runs in kernelspace in a CPU with VTIME active */
  13041. VTIME_SYS,
  13042. } vtime_snap_whence;
  13043. #endif
  13044. @@ -1538,6 +1559,9 @@
  13045. struct task_cputime cputime_expires;
  13046. struct list_head cpu_timers[3];
  13047. +#ifdef CONFIG_PREEMPT_RT_BASE
  13048. + struct task_struct *posix_timer_list;
  13049. +#endif
  13050. /* process credentials */
  13051. const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */
  13052. @@ -1569,10 +1593,15 @@
  13053. /* signal handlers */
  13054. struct signal_struct *signal;
  13055. struct sighand_struct *sighand;
  13056. + struct sigqueue *sigqueue_cache;
  13057. sigset_t blocked, real_blocked;
  13058. sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
  13059. struct sigpending pending;
  13060. +#ifdef CONFIG_PREEMPT_RT_FULL
  13061. + /* TODO: move me into ->restart_block ? */
  13062. + struct siginfo forced_info;
  13063. +#endif
  13064. unsigned long sas_ss_sp;
  13065. size_t sas_ss_size;
  13066. @@ -1796,6 +1825,12 @@
  13067. unsigned long trace;
  13068. /* bitmask and counter of trace recursion */
  13069. unsigned long trace_recursion;
  13070. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  13071. + u64 preempt_timestamp_hist;
  13072. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  13073. + long timer_offset;
  13074. +#endif
  13075. +#endif
  13076. #endif /* CONFIG_TRACING */
  13077. #ifdef CONFIG_MEMCG
  13078. struct mem_cgroup *memcg_in_oom;
  13079. @@ -1812,9 +1847,23 @@
  13080. unsigned int sequential_io;
  13081. unsigned int sequential_io_avg;
  13082. #endif
  13083. +#ifdef CONFIG_PREEMPT_RT_BASE
  13084. + struct rcu_head put_rcu;
  13085. + int softirq_nestcnt;
  13086. + unsigned int softirqs_raised;
  13087. +#endif
  13088. +#ifdef CONFIG_PREEMPT_RT_FULL
  13089. +# if defined CONFIG_HIGHMEM || defined CONFIG_X86_32
  13090. + int kmap_idx;
  13091. + pte_t kmap_pte[KM_TYPE_NR];
  13092. +# endif
  13093. +#endif
  13094. #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
  13095. unsigned long task_state_change;
  13096. #endif
  13097. +#ifdef CONFIG_PREEMPT_RT_FULL
  13098. + int xmit_recursion;
  13099. +#endif
  13100. int pagefault_disabled;
  13101. /* CPU-specific state of this task */
  13102. struct thread_struct thread;
  13103. @@ -1832,9 +1881,6 @@
  13104. # define arch_task_struct_size (sizeof(struct task_struct))
  13105. #endif
  13106. -/* Future-safe accessor for struct task_struct's cpus_allowed. */
  13107. -#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
  13108. -
  13109. #define TNF_MIGRATED 0x01
  13110. #define TNF_NO_GROUP 0x02
  13111. #define TNF_SHARED 0x04
  13112. @@ -2024,6 +2070,15 @@
  13113. extern void free_task(struct task_struct *tsk);
  13114. #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
  13115. +#ifdef CONFIG_PREEMPT_RT_BASE
  13116. +extern void __put_task_struct_cb(struct rcu_head *rhp);
  13117. +
  13118. +static inline void put_task_struct(struct task_struct *t)
  13119. +{
  13120. + if (atomic_dec_and_test(&t->usage))
  13121. + call_rcu(&t->put_rcu, __put_task_struct_cb);
  13122. +}
  13123. +#else
  13124. extern void __put_task_struct(struct task_struct *t);
  13125. static inline void put_task_struct(struct task_struct *t)
  13126. @@ -2031,6 +2086,7 @@
  13127. if (atomic_dec_and_test(&t->usage))
  13128. __put_task_struct(t);
  13129. }
  13130. +#endif
  13131. #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
  13132. extern void task_cputime(struct task_struct *t,
  13133. @@ -2069,6 +2125,7 @@
  13134. /*
  13135. * Per process flags
  13136. */
  13137. +#define PF_IN_SOFTIRQ 0x00000001 /* Task is serving softirq */
  13138. #define PF_EXITING 0x00000004 /* getting shut down */
  13139. #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
  13140. #define PF_VCPU 0x00000010 /* I'm a virtual CPU */
  13141. @@ -2233,6 +2290,10 @@
  13142. extern int set_cpus_allowed_ptr(struct task_struct *p,
  13143. const struct cpumask *new_mask);
  13144. +int migrate_me(void);
  13145. +void tell_sched_cpu_down_begin(int cpu);
  13146. +void tell_sched_cpu_down_done(int cpu);
  13147. +
  13148. #else
  13149. static inline void do_set_cpus_allowed(struct task_struct *p,
  13150. const struct cpumask *new_mask)
  13151. @@ -2245,6 +2306,9 @@
  13152. return -EINVAL;
  13153. return 0;
  13154. }
  13155. +static inline int migrate_me(void) { return 0; }
  13156. +static inline void tell_sched_cpu_down_begin(int cpu) { }
  13157. +static inline void tell_sched_cpu_down_done(int cpu) { }
  13158. #endif
  13159. #ifdef CONFIG_NO_HZ_COMMON
  13160. @@ -2454,6 +2518,7 @@
  13161. extern int wake_up_state(struct task_struct *tsk, unsigned int state);
  13162. extern int wake_up_process(struct task_struct *tsk);
  13163. +extern int wake_up_lock_sleeper(struct task_struct * tsk);
  13164. extern void wake_up_new_task(struct task_struct *tsk);
  13165. #ifdef CONFIG_SMP
  13166. extern void kick_process(struct task_struct *tsk);
  13167. @@ -2577,12 +2642,24 @@
  13168. /* mmdrop drops the mm and the page tables */
  13169. extern void __mmdrop(struct mm_struct *);
  13170. +
  13171. static inline void mmdrop(struct mm_struct * mm)
  13172. {
  13173. if (unlikely(atomic_dec_and_test(&mm->mm_count)))
  13174. __mmdrop(mm);
  13175. }
  13176. +#ifdef CONFIG_PREEMPT_RT_BASE
  13177. +extern void __mmdrop_delayed(struct rcu_head *rhp);
  13178. +static inline void mmdrop_delayed(struct mm_struct *mm)
  13179. +{
  13180. + if (atomic_dec_and_test(&mm->mm_count))
  13181. + call_rcu(&mm->delayed_drop, __mmdrop_delayed);
  13182. +}
  13183. +#else
  13184. +# define mmdrop_delayed(mm) mmdrop(mm)
  13185. +#endif
  13186. +
  13187. /* mmput gets rid of the mappings and all user-space */
  13188. extern void mmput(struct mm_struct *);
  13189. /* Grab a reference to a task's mm, if it is not already going away */
  13190. @@ -2892,6 +2969,43 @@
  13191. return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
  13192. }
  13193. +#ifdef CONFIG_PREEMPT_LAZY
  13194. +static inline void set_tsk_need_resched_lazy(struct task_struct *tsk)
  13195. +{
  13196. + set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
  13197. +}
  13198. +
  13199. +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk)
  13200. +{
  13201. + clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
  13202. +}
  13203. +
  13204. +static inline int test_tsk_need_resched_lazy(struct task_struct *tsk)
  13205. +{
  13206. + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY));
  13207. +}
  13208. +
  13209. +static inline int need_resched_lazy(void)
  13210. +{
  13211. + return test_thread_flag(TIF_NEED_RESCHED_LAZY);
  13212. +}
  13213. +
  13214. +static inline int need_resched_now(void)
  13215. +{
  13216. + return test_thread_flag(TIF_NEED_RESCHED);
  13217. +}
  13218. +
  13219. +#else
  13220. +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { }
  13221. +static inline int need_resched_lazy(void) { return 0; }
  13222. +
  13223. +static inline int need_resched_now(void)
  13224. +{
  13225. + return test_thread_flag(TIF_NEED_RESCHED);
  13226. +}
  13227. +
  13228. +#endif
  13229. +
  13230. static inline int restart_syscall(void)
  13231. {
  13232. set_tsk_thread_flag(current, TIF_SIGPENDING);
  13233. @@ -2923,6 +3037,51 @@
  13234. return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
  13235. }
  13236. +static inline bool __task_is_stopped_or_traced(struct task_struct *task)
  13237. +{
  13238. + if (task->state & (__TASK_STOPPED | __TASK_TRACED))
  13239. + return true;
  13240. +#ifdef CONFIG_PREEMPT_RT_FULL
  13241. + if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED))
  13242. + return true;
  13243. +#endif
  13244. + return false;
  13245. +}
  13246. +
  13247. +static inline bool task_is_stopped_or_traced(struct task_struct *task)
  13248. +{
  13249. + bool traced_stopped;
  13250. +
  13251. +#ifdef CONFIG_PREEMPT_RT_FULL
  13252. + unsigned long flags;
  13253. +
  13254. + raw_spin_lock_irqsave(&task->pi_lock, flags);
  13255. + traced_stopped = __task_is_stopped_or_traced(task);
  13256. + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  13257. +#else
  13258. + traced_stopped = __task_is_stopped_or_traced(task);
  13259. +#endif
  13260. + return traced_stopped;
  13261. +}
  13262. +
  13263. +static inline bool task_is_traced(struct task_struct *task)
  13264. +{
  13265. + bool traced = false;
  13266. +
  13267. + if (task->state & __TASK_TRACED)
  13268. + return true;
  13269. +#ifdef CONFIG_PREEMPT_RT_FULL
  13270. + /* in case the task is sleeping on tasklist_lock */
  13271. + raw_spin_lock_irq(&task->pi_lock);
  13272. + if (task->state & __TASK_TRACED)
  13273. + traced = true;
  13274. + else if (task->saved_state & __TASK_TRACED)
  13275. + traced = true;
  13276. + raw_spin_unlock_irq(&task->pi_lock);
  13277. +#endif
  13278. + return traced;
  13279. +}
  13280. +
  13281. /*
  13282. * cond_resched() and cond_resched_lock(): latency reduction via
  13283. * explicit rescheduling in places that are safe. The return
  13284. @@ -2944,12 +3103,16 @@
  13285. __cond_resched_lock(lock); \
  13286. })
  13287. +#ifndef CONFIG_PREEMPT_RT_FULL
  13288. extern int __cond_resched_softirq(void);
  13289. #define cond_resched_softirq() ({ \
  13290. ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
  13291. __cond_resched_softirq(); \
  13292. })
  13293. +#else
  13294. +# define cond_resched_softirq() cond_resched()
  13295. +#endif
  13296. static inline void cond_resched_rcu(void)
  13297. {
  13298. @@ -3111,6 +3274,31 @@
  13299. #endif /* CONFIG_SMP */
  13300. +static inline int __migrate_disabled(struct task_struct *p)
  13301. +{
  13302. +#ifdef CONFIG_PREEMPT_RT_FULL
  13303. + return p->migrate_disable;
  13304. +#else
  13305. + return 0;
  13306. +#endif
  13307. +}
  13308. +
  13309. +/* Future-safe accessor for struct task_struct's cpus_allowed. */
  13310. +static inline const struct cpumask *tsk_cpus_allowed(struct task_struct *p)
  13311. +{
  13312. + if (__migrate_disabled(p))
  13313. + return cpumask_of(task_cpu(p));
  13314. +
  13315. + return &p->cpus_allowed;
  13316. +}
  13317. +
  13318. +static inline int tsk_nr_cpus_allowed(struct task_struct *p)
  13319. +{
  13320. + if (__migrate_disabled(p))
  13321. + return 1;
  13322. + return p->nr_cpus_allowed;
  13323. +}
  13324. +
  13325. extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
  13326. extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
  13327. diff -Nur linux-4.4.62.orig/include/linux/seqlock.h linux-4.4.62/include/linux/seqlock.h
  13328. --- linux-4.4.62.orig/include/linux/seqlock.h 2017-04-18 07:15:37.000000000 +0200
  13329. +++ linux-4.4.62/include/linux/seqlock.h 2017-04-18 17:38:08.198649401 +0200
  13330. @@ -220,20 +220,30 @@
  13331. return __read_seqcount_retry(s, start);
  13332. }
  13333. -
  13334. -
  13335. -static inline void raw_write_seqcount_begin(seqcount_t *s)
  13336. +static inline void __raw_write_seqcount_begin(seqcount_t *s)
  13337. {
  13338. s->sequence++;
  13339. smp_wmb();
  13340. }
  13341. -static inline void raw_write_seqcount_end(seqcount_t *s)
  13342. +static inline void raw_write_seqcount_begin(seqcount_t *s)
  13343. +{
  13344. + preempt_disable_rt();
  13345. + __raw_write_seqcount_begin(s);
  13346. +}
  13347. +
  13348. +static inline void __raw_write_seqcount_end(seqcount_t *s)
  13349. {
  13350. smp_wmb();
  13351. s->sequence++;
  13352. }
  13353. +static inline void raw_write_seqcount_end(seqcount_t *s)
  13354. +{
  13355. + __raw_write_seqcount_end(s);
  13356. + preempt_enable_rt();
  13357. +}
  13358. +
  13359. /**
  13360. * raw_write_seqcount_barrier - do a seq write barrier
  13361. * @s: pointer to seqcount_t
  13362. @@ -425,10 +435,32 @@
  13363. /*
  13364. * Read side functions for starting and finalizing a read side section.
  13365. */
  13366. +#ifndef CONFIG_PREEMPT_RT_FULL
  13367. static inline unsigned read_seqbegin(const seqlock_t *sl)
  13368. {
  13369. return read_seqcount_begin(&sl->seqcount);
  13370. }
  13371. +#else
  13372. +/*
  13373. + * Starvation safe read side for RT
  13374. + */
  13375. +static inline unsigned read_seqbegin(seqlock_t *sl)
  13376. +{
  13377. + unsigned ret;
  13378. +
  13379. +repeat:
  13380. + ret = ACCESS_ONCE(sl->seqcount.sequence);
  13381. + if (unlikely(ret & 1)) {
  13382. + /*
  13383. + * Take the lock and let the writer proceed (i.e. evtl
  13384. + * boost it), otherwise we could loop here forever.
  13385. + */
  13386. + spin_unlock_wait(&sl->lock);
  13387. + goto repeat;
  13388. + }
  13389. + return ret;
  13390. +}
  13391. +#endif
  13392. static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
  13393. {
  13394. @@ -443,36 +475,36 @@
  13395. static inline void write_seqlock(seqlock_t *sl)
  13396. {
  13397. spin_lock(&sl->lock);
  13398. - write_seqcount_begin(&sl->seqcount);
  13399. + __raw_write_seqcount_begin(&sl->seqcount);
  13400. }
  13401. static inline void write_sequnlock(seqlock_t *sl)
  13402. {
  13403. - write_seqcount_end(&sl->seqcount);
  13404. + __raw_write_seqcount_end(&sl->seqcount);
  13405. spin_unlock(&sl->lock);
  13406. }
  13407. static inline void write_seqlock_bh(seqlock_t *sl)
  13408. {
  13409. spin_lock_bh(&sl->lock);
  13410. - write_seqcount_begin(&sl->seqcount);
  13411. + __raw_write_seqcount_begin(&sl->seqcount);
  13412. }
  13413. static inline void write_sequnlock_bh(seqlock_t *sl)
  13414. {
  13415. - write_seqcount_end(&sl->seqcount);
  13416. + __raw_write_seqcount_end(&sl->seqcount);
  13417. spin_unlock_bh(&sl->lock);
  13418. }
  13419. static inline void write_seqlock_irq(seqlock_t *sl)
  13420. {
  13421. spin_lock_irq(&sl->lock);
  13422. - write_seqcount_begin(&sl->seqcount);
  13423. + __raw_write_seqcount_begin(&sl->seqcount);
  13424. }
  13425. static inline void write_sequnlock_irq(seqlock_t *sl)
  13426. {
  13427. - write_seqcount_end(&sl->seqcount);
  13428. + __raw_write_seqcount_end(&sl->seqcount);
  13429. spin_unlock_irq(&sl->lock);
  13430. }
  13431. @@ -481,7 +513,7 @@
  13432. unsigned long flags;
  13433. spin_lock_irqsave(&sl->lock, flags);
  13434. - write_seqcount_begin(&sl->seqcount);
  13435. + __raw_write_seqcount_begin(&sl->seqcount);
  13436. return flags;
  13437. }
  13438. @@ -491,7 +523,7 @@
  13439. static inline void
  13440. write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
  13441. {
  13442. - write_seqcount_end(&sl->seqcount);
  13443. + __raw_write_seqcount_end(&sl->seqcount);
  13444. spin_unlock_irqrestore(&sl->lock, flags);
  13445. }
  13446. diff -Nur linux-4.4.62.orig/include/linux/signal.h linux-4.4.62/include/linux/signal.h
  13447. --- linux-4.4.62.orig/include/linux/signal.h 2017-04-18 07:15:37.000000000 +0200
  13448. +++ linux-4.4.62/include/linux/signal.h 2017-04-18 17:38:08.198649401 +0200
  13449. @@ -233,6 +233,7 @@
  13450. }
  13451. extern void flush_sigqueue(struct sigpending *queue);
  13452. +extern void flush_task_sigqueue(struct task_struct *tsk);
  13453. /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */
  13454. static inline int valid_signal(unsigned long sig)
  13455. diff -Nur linux-4.4.62.orig/include/linux/skbuff.h linux-4.4.62/include/linux/skbuff.h
  13456. --- linux-4.4.62.orig/include/linux/skbuff.h 2017-04-18 07:15:37.000000000 +0200
  13457. +++ linux-4.4.62/include/linux/skbuff.h 2017-04-18 17:38:08.198649401 +0200
  13458. @@ -203,6 +203,7 @@
  13459. __u32 qlen;
  13460. spinlock_t lock;
  13461. + raw_spinlock_t raw_lock;
  13462. };
  13463. struct sk_buff;
  13464. @@ -1465,6 +1466,12 @@
  13465. __skb_queue_head_init(list);
  13466. }
  13467. +static inline void skb_queue_head_init_raw(struct sk_buff_head *list)
  13468. +{
  13469. + raw_spin_lock_init(&list->raw_lock);
  13470. + __skb_queue_head_init(list);
  13471. +}
  13472. +
  13473. static inline void skb_queue_head_init_class(struct sk_buff_head *list,
  13474. struct lock_class_key *class)
  13475. {
  13476. diff -Nur linux-4.4.62.orig/include/linux/smp.h linux-4.4.62/include/linux/smp.h
  13477. --- linux-4.4.62.orig/include/linux/smp.h 2017-04-18 07:15:37.000000000 +0200
  13478. +++ linux-4.4.62/include/linux/smp.h 2017-04-18 17:38:08.198649401 +0200
  13479. @@ -185,6 +185,9 @@
  13480. #define get_cpu() ({ preempt_disable(); smp_processor_id(); })
  13481. #define put_cpu() preempt_enable()
  13482. +#define get_cpu_light() ({ migrate_disable(); smp_processor_id(); })
  13483. +#define put_cpu_light() migrate_enable()
  13484. +
  13485. /*
  13486. * Callback to arch code if there's nosmp or maxcpus=0 on the
  13487. * boot command line:
  13488. diff -Nur linux-4.4.62.orig/include/linux/spinlock_api_smp.h linux-4.4.62/include/linux/spinlock_api_smp.h
  13489. --- linux-4.4.62.orig/include/linux/spinlock_api_smp.h 2017-04-18 07:15:37.000000000 +0200
  13490. +++ linux-4.4.62/include/linux/spinlock_api_smp.h 2017-04-18 17:38:08.198649401 +0200
  13491. @@ -189,6 +189,8 @@
  13492. return 0;
  13493. }
  13494. -#include <linux/rwlock_api_smp.h>
  13495. +#ifndef CONFIG_PREEMPT_RT_FULL
  13496. +# include <linux/rwlock_api_smp.h>
  13497. +#endif
  13498. #endif /* __LINUX_SPINLOCK_API_SMP_H */
  13499. diff -Nur linux-4.4.62.orig/include/linux/spinlock.h linux-4.4.62/include/linux/spinlock.h
  13500. --- linux-4.4.62.orig/include/linux/spinlock.h 2017-04-18 07:15:37.000000000 +0200
  13501. +++ linux-4.4.62/include/linux/spinlock.h 2017-04-18 17:38:08.198649401 +0200
  13502. @@ -271,7 +271,11 @@
  13503. #define raw_spin_can_lock(lock) (!raw_spin_is_locked(lock))
  13504. /* Include rwlock functions */
  13505. -#include <linux/rwlock.h>
  13506. +#ifdef CONFIG_PREEMPT_RT_FULL
  13507. +# include <linux/rwlock_rt.h>
  13508. +#else
  13509. +# include <linux/rwlock.h>
  13510. +#endif
  13511. /*
  13512. * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
  13513. @@ -282,6 +286,10 @@
  13514. # include <linux/spinlock_api_up.h>
  13515. #endif
  13516. +#ifdef CONFIG_PREEMPT_RT_FULL
  13517. +# include <linux/spinlock_rt.h>
  13518. +#else /* PREEMPT_RT_FULL */
  13519. +
  13520. /*
  13521. * Map the spin_lock functions to the raw variants for PREEMPT_RT=n
  13522. */
  13523. @@ -347,6 +355,12 @@
  13524. raw_spin_unlock(&lock->rlock);
  13525. }
  13526. +static __always_inline int spin_unlock_no_deboost(spinlock_t *lock)
  13527. +{
  13528. + raw_spin_unlock(&lock->rlock);
  13529. + return 0;
  13530. +}
  13531. +
  13532. static __always_inline void spin_unlock_bh(spinlock_t *lock)
  13533. {
  13534. raw_spin_unlock_bh(&lock->rlock);
  13535. @@ -416,4 +430,6 @@
  13536. #define atomic_dec_and_lock(atomic, lock) \
  13537. __cond_lock(lock, _atomic_dec_and_lock(atomic, lock))
  13538. +#endif /* !PREEMPT_RT_FULL */
  13539. +
  13540. #endif /* __LINUX_SPINLOCK_H */
  13541. diff -Nur linux-4.4.62.orig/include/linux/spinlock_rt.h linux-4.4.62/include/linux/spinlock_rt.h
  13542. --- linux-4.4.62.orig/include/linux/spinlock_rt.h 1970-01-01 01:00:00.000000000 +0100
  13543. +++ linux-4.4.62/include/linux/spinlock_rt.h 2017-04-18 17:38:08.198649401 +0200
  13544. @@ -0,0 +1,165 @@
  13545. +#ifndef __LINUX_SPINLOCK_RT_H
  13546. +#define __LINUX_SPINLOCK_RT_H
  13547. +
  13548. +#ifndef __LINUX_SPINLOCK_H
  13549. +#error Do not include directly. Use spinlock.h
  13550. +#endif
  13551. +
  13552. +#include <linux/bug.h>
  13553. +
  13554. +extern void
  13555. +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key);
  13556. +
  13557. +#define spin_lock_init(slock) \
  13558. +do { \
  13559. + static struct lock_class_key __key; \
  13560. + \
  13561. + rt_mutex_init(&(slock)->lock); \
  13562. + __rt_spin_lock_init(slock, #slock, &__key); \
  13563. +} while (0)
  13564. +
  13565. +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock);
  13566. +void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock);
  13567. +int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock);
  13568. +
  13569. +extern void __lockfunc rt_spin_lock(spinlock_t *lock);
  13570. +extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock);
  13571. +extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass);
  13572. +extern void __lockfunc rt_spin_unlock(spinlock_t *lock);
  13573. +extern int __lockfunc rt_spin_unlock_no_deboost(spinlock_t *lock);
  13574. +extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock);
  13575. +extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags);
  13576. +extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock);
  13577. +extern int __lockfunc rt_spin_trylock(spinlock_t *lock);
  13578. +extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock);
  13579. +
  13580. +/*
  13581. + * lockdep-less calls, for derived types like rwlock:
  13582. + * (for trylock they can use rt_mutex_trylock() directly.
  13583. + */
  13584. +extern void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock);
  13585. +extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock);
  13586. +extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock);
  13587. +extern int __lockfunc __rt_spin_trylock(struct rt_mutex *lock);
  13588. +
  13589. +#define spin_lock(lock) rt_spin_lock(lock)
  13590. +
  13591. +#define spin_lock_bh(lock) \
  13592. + do { \
  13593. + local_bh_disable(); \
  13594. + rt_spin_lock(lock); \
  13595. + } while (0)
  13596. +
  13597. +#define spin_lock_irq(lock) spin_lock(lock)
  13598. +
  13599. +#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock))
  13600. +
  13601. +#define spin_trylock(lock) \
  13602. +({ \
  13603. + int __locked; \
  13604. + __locked = spin_do_trylock(lock); \
  13605. + __locked; \
  13606. +})
  13607. +
  13608. +#ifdef CONFIG_LOCKDEP
  13609. +# define spin_lock_nested(lock, subclass) \
  13610. + do { \
  13611. + rt_spin_lock_nested(lock, subclass); \
  13612. + } while (0)
  13613. +
  13614. +#define spin_lock_bh_nested(lock, subclass) \
  13615. + do { \
  13616. + local_bh_disable(); \
  13617. + rt_spin_lock_nested(lock, subclass); \
  13618. + } while (0)
  13619. +
  13620. +# define spin_lock_irqsave_nested(lock, flags, subclass) \
  13621. + do { \
  13622. + typecheck(unsigned long, flags); \
  13623. + flags = 0; \
  13624. + rt_spin_lock_nested(lock, subclass); \
  13625. + } while (0)
  13626. +#else
  13627. +# define spin_lock_nested(lock, subclass) spin_lock(lock)
  13628. +# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(lock)
  13629. +
  13630. +# define spin_lock_irqsave_nested(lock, flags, subclass) \
  13631. + do { \
  13632. + typecheck(unsigned long, flags); \
  13633. + flags = 0; \
  13634. + spin_lock(lock); \
  13635. + } while (0)
  13636. +#endif
  13637. +
  13638. +#define spin_lock_irqsave(lock, flags) \
  13639. + do { \
  13640. + typecheck(unsigned long, flags); \
  13641. + flags = 0; \
  13642. + spin_lock(lock); \
  13643. + } while (0)
  13644. +
  13645. +static inline unsigned long spin_lock_trace_flags(spinlock_t *lock)
  13646. +{
  13647. + unsigned long flags = 0;
  13648. +#ifdef CONFIG_TRACE_IRQFLAGS
  13649. + flags = rt_spin_lock_trace_flags(lock);
  13650. +#else
  13651. + spin_lock(lock); /* lock_local */
  13652. +#endif
  13653. + return flags;
  13654. +}
  13655. +
  13656. +/* FIXME: we need rt_spin_lock_nest_lock */
  13657. +#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0)
  13658. +
  13659. +#define spin_unlock(lock) rt_spin_unlock(lock)
  13660. +#define spin_unlock_no_deboost(lock) rt_spin_unlock_no_deboost(lock)
  13661. +
  13662. +#define spin_unlock_bh(lock) \
  13663. + do { \
  13664. + rt_spin_unlock(lock); \
  13665. + local_bh_enable(); \
  13666. + } while (0)
  13667. +
  13668. +#define spin_unlock_irq(lock) spin_unlock(lock)
  13669. +
  13670. +#define spin_unlock_irqrestore(lock, flags) \
  13671. + do { \
  13672. + typecheck(unsigned long, flags); \
  13673. + (void) flags; \
  13674. + spin_unlock(lock); \
  13675. + } while (0)
  13676. +
  13677. +#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock))
  13678. +#define spin_trylock_irq(lock) spin_trylock(lock)
  13679. +
  13680. +#define spin_trylock_irqsave(lock, flags) \
  13681. + rt_spin_trylock_irqsave(lock, &(flags))
  13682. +
  13683. +#define spin_unlock_wait(lock) rt_spin_unlock_wait(lock)
  13684. +
  13685. +#ifdef CONFIG_GENERIC_LOCKBREAK
  13686. +# define spin_is_contended(lock) ((lock)->break_lock)
  13687. +#else
  13688. +# define spin_is_contended(lock) (((void)(lock), 0))
  13689. +#endif
  13690. +
  13691. +static inline int spin_can_lock(spinlock_t *lock)
  13692. +{
  13693. + return !rt_mutex_is_locked(&lock->lock);
  13694. +}
  13695. +
  13696. +static inline int spin_is_locked(spinlock_t *lock)
  13697. +{
  13698. + return rt_mutex_is_locked(&lock->lock);
  13699. +}
  13700. +
  13701. +static inline void assert_spin_locked(spinlock_t *lock)
  13702. +{
  13703. + BUG_ON(!spin_is_locked(lock));
  13704. +}
  13705. +
  13706. +#define atomic_dec_and_lock(atomic, lock) \
  13707. + atomic_dec_and_spin_lock(atomic, lock)
  13708. +
  13709. +#endif
  13710. diff -Nur linux-4.4.62.orig/include/linux/spinlock_types.h linux-4.4.62/include/linux/spinlock_types.h
  13711. --- linux-4.4.62.orig/include/linux/spinlock_types.h 2017-04-18 07:15:37.000000000 +0200
  13712. +++ linux-4.4.62/include/linux/spinlock_types.h 2017-04-18 17:38:08.198649401 +0200
  13713. @@ -9,80 +9,15 @@
  13714. * Released under the General Public License (GPL).
  13715. */
  13716. -#if defined(CONFIG_SMP)
  13717. -# include <asm/spinlock_types.h>
  13718. -#else
  13719. -# include <linux/spinlock_types_up.h>
  13720. -#endif
  13721. -
  13722. -#include <linux/lockdep.h>
  13723. -
  13724. -typedef struct raw_spinlock {
  13725. - arch_spinlock_t raw_lock;
  13726. -#ifdef CONFIG_GENERIC_LOCKBREAK
  13727. - unsigned int break_lock;
  13728. -#endif
  13729. -#ifdef CONFIG_DEBUG_SPINLOCK
  13730. - unsigned int magic, owner_cpu;
  13731. - void *owner;
  13732. -#endif
  13733. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  13734. - struct lockdep_map dep_map;
  13735. -#endif
  13736. -} raw_spinlock_t;
  13737. -
  13738. -#define SPINLOCK_MAGIC 0xdead4ead
  13739. -
  13740. -#define SPINLOCK_OWNER_INIT ((void *)-1L)
  13741. -
  13742. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  13743. -# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
  13744. -#else
  13745. -# define SPIN_DEP_MAP_INIT(lockname)
  13746. -#endif
  13747. +#include <linux/spinlock_types_raw.h>
  13748. -#ifdef CONFIG_DEBUG_SPINLOCK
  13749. -# define SPIN_DEBUG_INIT(lockname) \
  13750. - .magic = SPINLOCK_MAGIC, \
  13751. - .owner_cpu = -1, \
  13752. - .owner = SPINLOCK_OWNER_INIT,
  13753. +#ifndef CONFIG_PREEMPT_RT_FULL
  13754. +# include <linux/spinlock_types_nort.h>
  13755. +# include <linux/rwlock_types.h>
  13756. #else
  13757. -# define SPIN_DEBUG_INIT(lockname)
  13758. +# include <linux/rtmutex.h>
  13759. +# include <linux/spinlock_types_rt.h>
  13760. +# include <linux/rwlock_types_rt.h>
  13761. #endif
  13762. -#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
  13763. - { \
  13764. - .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
  13765. - SPIN_DEBUG_INIT(lockname) \
  13766. - SPIN_DEP_MAP_INIT(lockname) }
  13767. -
  13768. -#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
  13769. - (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
  13770. -
  13771. -#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
  13772. -
  13773. -typedef struct spinlock {
  13774. - union {
  13775. - struct raw_spinlock rlock;
  13776. -
  13777. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  13778. -# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
  13779. - struct {
  13780. - u8 __padding[LOCK_PADSIZE];
  13781. - struct lockdep_map dep_map;
  13782. - };
  13783. -#endif
  13784. - };
  13785. -} spinlock_t;
  13786. -
  13787. -#define __SPIN_LOCK_INITIALIZER(lockname) \
  13788. - { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
  13789. -
  13790. -#define __SPIN_LOCK_UNLOCKED(lockname) \
  13791. - (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
  13792. -
  13793. -#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
  13794. -
  13795. -#include <linux/rwlock_types.h>
  13796. -
  13797. #endif /* __LINUX_SPINLOCK_TYPES_H */
  13798. diff -Nur linux-4.4.62.orig/include/linux/spinlock_types_nort.h linux-4.4.62/include/linux/spinlock_types_nort.h
  13799. --- linux-4.4.62.orig/include/linux/spinlock_types_nort.h 1970-01-01 01:00:00.000000000 +0100
  13800. +++ linux-4.4.62/include/linux/spinlock_types_nort.h 2017-04-18 17:38:08.198649401 +0200
  13801. @@ -0,0 +1,33 @@
  13802. +#ifndef __LINUX_SPINLOCK_TYPES_NORT_H
  13803. +#define __LINUX_SPINLOCK_TYPES_NORT_H
  13804. +
  13805. +#ifndef __LINUX_SPINLOCK_TYPES_H
  13806. +#error "Do not include directly. Include spinlock_types.h instead"
  13807. +#endif
  13808. +
  13809. +/*
  13810. + * The non RT version maps spinlocks to raw_spinlocks
  13811. + */
  13812. +typedef struct spinlock {
  13813. + union {
  13814. + struct raw_spinlock rlock;
  13815. +
  13816. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  13817. +# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
  13818. + struct {
  13819. + u8 __padding[LOCK_PADSIZE];
  13820. + struct lockdep_map dep_map;
  13821. + };
  13822. +#endif
  13823. + };
  13824. +} spinlock_t;
  13825. +
  13826. +#define __SPIN_LOCK_INITIALIZER(lockname) \
  13827. + { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
  13828. +
  13829. +#define __SPIN_LOCK_UNLOCKED(lockname) \
  13830. + (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
  13831. +
  13832. +#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
  13833. +
  13834. +#endif
  13835. diff -Nur linux-4.4.62.orig/include/linux/spinlock_types_raw.h linux-4.4.62/include/linux/spinlock_types_raw.h
  13836. --- linux-4.4.62.orig/include/linux/spinlock_types_raw.h 1970-01-01 01:00:00.000000000 +0100
  13837. +++ linux-4.4.62/include/linux/spinlock_types_raw.h 2017-04-18 17:38:08.198649401 +0200
  13838. @@ -0,0 +1,56 @@
  13839. +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
  13840. +#define __LINUX_SPINLOCK_TYPES_RAW_H
  13841. +
  13842. +#if defined(CONFIG_SMP)
  13843. +# include <asm/spinlock_types.h>
  13844. +#else
  13845. +# include <linux/spinlock_types_up.h>
  13846. +#endif
  13847. +
  13848. +#include <linux/lockdep.h>
  13849. +
  13850. +typedef struct raw_spinlock {
  13851. + arch_spinlock_t raw_lock;
  13852. +#ifdef CONFIG_GENERIC_LOCKBREAK
  13853. + unsigned int break_lock;
  13854. +#endif
  13855. +#ifdef CONFIG_DEBUG_SPINLOCK
  13856. + unsigned int magic, owner_cpu;
  13857. + void *owner;
  13858. +#endif
  13859. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  13860. + struct lockdep_map dep_map;
  13861. +#endif
  13862. +} raw_spinlock_t;
  13863. +
  13864. +#define SPINLOCK_MAGIC 0xdead4ead
  13865. +
  13866. +#define SPINLOCK_OWNER_INIT ((void *)-1L)
  13867. +
  13868. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  13869. +# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
  13870. +#else
  13871. +# define SPIN_DEP_MAP_INIT(lockname)
  13872. +#endif
  13873. +
  13874. +#ifdef CONFIG_DEBUG_SPINLOCK
  13875. +# define SPIN_DEBUG_INIT(lockname) \
  13876. + .magic = SPINLOCK_MAGIC, \
  13877. + .owner_cpu = -1, \
  13878. + .owner = SPINLOCK_OWNER_INIT,
  13879. +#else
  13880. +# define SPIN_DEBUG_INIT(lockname)
  13881. +#endif
  13882. +
  13883. +#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
  13884. + { \
  13885. + .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
  13886. + SPIN_DEBUG_INIT(lockname) \
  13887. + SPIN_DEP_MAP_INIT(lockname) }
  13888. +
  13889. +#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
  13890. + (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
  13891. +
  13892. +#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
  13893. +
  13894. +#endif
  13895. diff -Nur linux-4.4.62.orig/include/linux/spinlock_types_rt.h linux-4.4.62/include/linux/spinlock_types_rt.h
  13896. --- linux-4.4.62.orig/include/linux/spinlock_types_rt.h 1970-01-01 01:00:00.000000000 +0100
  13897. +++ linux-4.4.62/include/linux/spinlock_types_rt.h 2017-04-18 17:38:08.198649401 +0200
  13898. @@ -0,0 +1,51 @@
  13899. +#ifndef __LINUX_SPINLOCK_TYPES_RT_H
  13900. +#define __LINUX_SPINLOCK_TYPES_RT_H
  13901. +
  13902. +#ifndef __LINUX_SPINLOCK_TYPES_H
  13903. +#error "Do not include directly. Include spinlock_types.h instead"
  13904. +#endif
  13905. +
  13906. +#include <linux/cache.h>
  13907. +
  13908. +/*
  13909. + * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field:
  13910. + */
  13911. +typedef struct spinlock {
  13912. + struct rt_mutex lock;
  13913. + unsigned int break_lock;
  13914. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  13915. + struct lockdep_map dep_map;
  13916. +#endif
  13917. +} spinlock_t;
  13918. +
  13919. +#ifdef CONFIG_DEBUG_RT_MUTEXES
  13920. +# define __RT_SPIN_INITIALIZER(name) \
  13921. + { \
  13922. + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
  13923. + .save_state = 1, \
  13924. + .file = __FILE__, \
  13925. + .line = __LINE__ , \
  13926. + }
  13927. +#else
  13928. +# define __RT_SPIN_INITIALIZER(name) \
  13929. + { \
  13930. + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
  13931. + .save_state = 1, \
  13932. + }
  13933. +#endif
  13934. +
  13935. +/*
  13936. +.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock)
  13937. +*/
  13938. +
  13939. +#define __SPIN_LOCK_UNLOCKED(name) \
  13940. + { .lock = __RT_SPIN_INITIALIZER(name.lock), \
  13941. + SPIN_DEP_MAP_INIT(name) }
  13942. +
  13943. +#define __DEFINE_SPINLOCK(name) \
  13944. + spinlock_t name = __SPIN_LOCK_UNLOCKED(name)
  13945. +
  13946. +#define DEFINE_SPINLOCK(name) \
  13947. + spinlock_t name __cacheline_aligned_in_smp = __SPIN_LOCK_UNLOCKED(name)
  13948. +
  13949. +#endif
  13950. diff -Nur linux-4.4.62.orig/include/linux/srcu.h linux-4.4.62/include/linux/srcu.h
  13951. --- linux-4.4.62.orig/include/linux/srcu.h 2017-04-18 07:15:37.000000000 +0200
  13952. +++ linux-4.4.62/include/linux/srcu.h 2017-04-18 17:38:08.198649401 +0200
  13953. @@ -84,10 +84,10 @@
  13954. void process_srcu(struct work_struct *work);
  13955. -#define __SRCU_STRUCT_INIT(name) \
  13956. +#define __SRCU_STRUCT_INIT(name, pcpu_name) \
  13957. { \
  13958. .completed = -300, \
  13959. - .per_cpu_ref = &name##_srcu_array, \
  13960. + .per_cpu_ref = &pcpu_name, \
  13961. .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \
  13962. .running = false, \
  13963. .batch_queue = RCU_BATCH_INIT(name.batch_queue), \
  13964. @@ -104,7 +104,7 @@
  13965. */
  13966. #define __DEFINE_SRCU(name, is_static) \
  13967. static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
  13968. - is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
  13969. + is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_array)
  13970. #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */)
  13971. #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static)
  13972. diff -Nur linux-4.4.62.orig/include/linux/suspend.h linux-4.4.62/include/linux/suspend.h
  13973. --- linux-4.4.62.orig/include/linux/suspend.h 2017-04-18 07:15:37.000000000 +0200
  13974. +++ linux-4.4.62/include/linux/suspend.h 2017-04-18 17:38:08.198649401 +0200
  13975. @@ -194,6 +194,12 @@
  13976. void (*end)(void);
  13977. };
  13978. +#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION)
  13979. +extern bool pm_in_action;
  13980. +#else
  13981. +# define pm_in_action false
  13982. +#endif
  13983. +
  13984. #ifdef CONFIG_SUSPEND
  13985. /**
  13986. * suspend_set_ops - set platform dependent suspend operations
  13987. diff -Nur linux-4.4.62.orig/include/linux/swait.h linux-4.4.62/include/linux/swait.h
  13988. --- linux-4.4.62.orig/include/linux/swait.h 1970-01-01 01:00:00.000000000 +0100
  13989. +++ linux-4.4.62/include/linux/swait.h 2017-04-18 17:38:08.198649401 +0200
  13990. @@ -0,0 +1,173 @@
  13991. +#ifndef _LINUX_SWAIT_H
  13992. +#define _LINUX_SWAIT_H
  13993. +
  13994. +#include <linux/list.h>
  13995. +#include <linux/stddef.h>
  13996. +#include <linux/spinlock.h>
  13997. +#include <asm/current.h>
  13998. +
  13999. +/*
  14000. + * Simple wait queues
  14001. + *
  14002. + * While these are very similar to the other/complex wait queues (wait.h) the
  14003. + * most important difference is that the simple waitqueue allows for
  14004. + * deterministic behaviour -- IOW it has strictly bounded IRQ and lock hold
  14005. + * times.
  14006. + *
  14007. + * In order to make this so, we had to drop a fair number of features of the
  14008. + * other waitqueue code; notably:
  14009. + *
  14010. + * - mixing INTERRUPTIBLE and UNINTERRUPTIBLE sleeps on the same waitqueue;
  14011. + * all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right
  14012. + * sleeper state.
  14013. + *
  14014. + * - the exclusive mode; because this requires preserving the list order
  14015. + * and this is hard.
  14016. + *
  14017. + * - custom wake functions; because you cannot give any guarantees about
  14018. + * random code.
  14019. + *
  14020. + * As a side effect of this; the data structures are slimmer.
  14021. + *
  14022. + * One would recommend using this wait queue where possible.
  14023. + */
  14024. +
  14025. +struct task_struct;
  14026. +
  14027. +struct swait_queue_head {
  14028. + raw_spinlock_t lock;
  14029. + struct list_head task_list;
  14030. +};
  14031. +
  14032. +struct swait_queue {
  14033. + struct task_struct *task;
  14034. + struct list_head task_list;
  14035. +};
  14036. +
  14037. +#define __SWAITQUEUE_INITIALIZER(name) { \
  14038. + .task = current, \
  14039. + .task_list = LIST_HEAD_INIT((name).task_list), \
  14040. +}
  14041. +
  14042. +#define DECLARE_SWAITQUEUE(name) \
  14043. + struct swait_queue name = __SWAITQUEUE_INITIALIZER(name)
  14044. +
  14045. +#define __SWAIT_QUEUE_HEAD_INITIALIZER(name) { \
  14046. + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
  14047. + .task_list = LIST_HEAD_INIT((name).task_list), \
  14048. +}
  14049. +
  14050. +#define DECLARE_SWAIT_QUEUE_HEAD(name) \
  14051. + struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INITIALIZER(name)
  14052. +
  14053. +extern void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
  14054. + struct lock_class_key *key);
  14055. +
  14056. +#define init_swait_queue_head(q) \
  14057. + do { \
  14058. + static struct lock_class_key __key; \
  14059. + __init_swait_queue_head((q), #q, &__key); \
  14060. + } while (0)
  14061. +
  14062. +#ifdef CONFIG_LOCKDEP
  14063. +# define __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name) \
  14064. + ({ init_swait_queue_head(&name); name; })
  14065. +# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name) \
  14066. + struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name)
  14067. +#else
  14068. +# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name) \
  14069. + DECLARE_SWAIT_QUEUE_HEAD(name)
  14070. +#endif
  14071. +
  14072. +static inline int swait_active(struct swait_queue_head *q)
  14073. +{
  14074. + return !list_empty(&q->task_list);
  14075. +}
  14076. +
  14077. +extern void swake_up(struct swait_queue_head *q);
  14078. +extern void swake_up_all(struct swait_queue_head *q);
  14079. +extern void swake_up_locked(struct swait_queue_head *q);
  14080. +extern void swake_up_all_locked(struct swait_queue_head *q);
  14081. +
  14082. +extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
  14083. +extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state);
  14084. +extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);
  14085. +
  14086. +extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
  14087. +extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
  14088. +
  14089. +/* as per ___wait_event() but for swait, therefore "exclusive == 0" */
  14090. +#define ___swait_event(wq, condition, state, ret, cmd) \
  14091. +({ \
  14092. + struct swait_queue __wait; \
  14093. + long __ret = ret; \
  14094. + \
  14095. + INIT_LIST_HEAD(&__wait.task_list); \
  14096. + for (;;) { \
  14097. + long __int = prepare_to_swait_event(&wq, &__wait, state);\
  14098. + \
  14099. + if (condition) \
  14100. + break; \
  14101. + \
  14102. + if (___wait_is_interruptible(state) && __int) { \
  14103. + __ret = __int; \
  14104. + break; \
  14105. + } \
  14106. + \
  14107. + cmd; \
  14108. + } \
  14109. + finish_swait(&wq, &__wait); \
  14110. + __ret; \
  14111. +})
  14112. +
  14113. +#define __swait_event(wq, condition) \
  14114. + (void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, \
  14115. + schedule())
  14116. +
  14117. +#define swait_event(wq, condition) \
  14118. +do { \
  14119. + if (condition) \
  14120. + break; \
  14121. + __swait_event(wq, condition); \
  14122. +} while (0)
  14123. +
  14124. +#define __swait_event_timeout(wq, condition, timeout) \
  14125. + ___swait_event(wq, ___wait_cond_timeout(condition), \
  14126. + TASK_UNINTERRUPTIBLE, timeout, \
  14127. + __ret = schedule_timeout(__ret))
  14128. +
  14129. +#define swait_event_timeout(wq, condition, timeout) \
  14130. +({ \
  14131. + long __ret = timeout; \
  14132. + if (!___wait_cond_timeout(condition)) \
  14133. + __ret = __swait_event_timeout(wq, condition, timeout); \
  14134. + __ret; \
  14135. +})
  14136. +
  14137. +#define __swait_event_interruptible(wq, condition) \
  14138. + ___swait_event(wq, condition, TASK_INTERRUPTIBLE, 0, \
  14139. + schedule())
  14140. +
  14141. +#define swait_event_interruptible(wq, condition) \
  14142. +({ \
  14143. + int __ret = 0; \
  14144. + if (!(condition)) \
  14145. + __ret = __swait_event_interruptible(wq, condition); \
  14146. + __ret; \
  14147. +})
  14148. +
  14149. +#define __swait_event_interruptible_timeout(wq, condition, timeout) \
  14150. + ___swait_event(wq, ___wait_cond_timeout(condition), \
  14151. + TASK_INTERRUPTIBLE, timeout, \
  14152. + __ret = schedule_timeout(__ret))
  14153. +
  14154. +#define swait_event_interruptible_timeout(wq, condition, timeout) \
  14155. +({ \
  14156. + long __ret = timeout; \
  14157. + if (!___wait_cond_timeout(condition)) \
  14158. + __ret = __swait_event_interruptible_timeout(wq, \
  14159. + condition, timeout); \
  14160. + __ret; \
  14161. +})
  14162. +
  14163. +#endif /* _LINUX_SWAIT_H */
  14164. diff -Nur linux-4.4.62.orig/include/linux/swap.h linux-4.4.62/include/linux/swap.h
  14165. --- linux-4.4.62.orig/include/linux/swap.h 2017-04-18 07:15:37.000000000 +0200
  14166. +++ linux-4.4.62/include/linux/swap.h 2017-04-18 17:38:08.198649401 +0200
  14167. @@ -11,6 +11,7 @@
  14168. #include <linux/fs.h>
  14169. #include <linux/atomic.h>
  14170. #include <linux/page-flags.h>
  14171. +#include <linux/locallock.h>
  14172. #include <asm/page.h>
  14173. struct notifier_block;
  14174. @@ -252,7 +253,8 @@
  14175. void *workingset_eviction(struct address_space *mapping, struct page *page);
  14176. bool workingset_refault(void *shadow);
  14177. void workingset_activation(struct page *page);
  14178. -extern struct list_lru workingset_shadow_nodes;
  14179. +extern struct list_lru __workingset_shadow_nodes;
  14180. +DECLARE_LOCAL_IRQ_LOCK(workingset_shadow_lock);
  14181. static inline unsigned int workingset_node_pages(struct radix_tree_node *node)
  14182. {
  14183. @@ -298,6 +300,7 @@
  14184. /* linux/mm/swap.c */
  14185. +DECLARE_LOCAL_IRQ_LOCK(swapvec_lock);
  14186. extern void lru_cache_add(struct page *);
  14187. extern void lru_cache_add_anon(struct page *page);
  14188. extern void lru_cache_add_file(struct page *page);
  14189. diff -Nur linux-4.4.62.orig/include/linux/swork.h linux-4.4.62/include/linux/swork.h
  14190. --- linux-4.4.62.orig/include/linux/swork.h 1970-01-01 01:00:00.000000000 +0100
  14191. +++ linux-4.4.62/include/linux/swork.h 2017-04-18 17:38:08.198649401 +0200
  14192. @@ -0,0 +1,24 @@
  14193. +#ifndef _LINUX_SWORK_H
  14194. +#define _LINUX_SWORK_H
  14195. +
  14196. +#include <linux/list.h>
  14197. +
  14198. +struct swork_event {
  14199. + struct list_head item;
  14200. + unsigned long flags;
  14201. + void (*func)(struct swork_event *);
  14202. +};
  14203. +
  14204. +static inline void INIT_SWORK(struct swork_event *event,
  14205. + void (*func)(struct swork_event *))
  14206. +{
  14207. + event->flags = 0;
  14208. + event->func = func;
  14209. +}
  14210. +
  14211. +bool swork_queue(struct swork_event *sev);
  14212. +
  14213. +int swork_get(void);
  14214. +void swork_put(void);
  14215. +
  14216. +#endif /* _LINUX_SWORK_H */
  14217. diff -Nur linux-4.4.62.orig/include/linux/thread_info.h linux-4.4.62/include/linux/thread_info.h
  14218. --- linux-4.4.62.orig/include/linux/thread_info.h 2017-04-18 07:15:37.000000000 +0200
  14219. +++ linux-4.4.62/include/linux/thread_info.h 2017-04-18 17:38:08.198649401 +0200
  14220. @@ -102,7 +102,17 @@
  14221. #define test_thread_flag(flag) \
  14222. test_ti_thread_flag(current_thread_info(), flag)
  14223. -#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
  14224. +#ifdef CONFIG_PREEMPT_LAZY
  14225. +#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \
  14226. + test_thread_flag(TIF_NEED_RESCHED_LAZY))
  14227. +#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED))
  14228. +#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY))
  14229. +
  14230. +#else
  14231. +#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
  14232. +#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED)
  14233. +#define tif_need_resched_lazy() 0
  14234. +#endif
  14235. #if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK
  14236. /*
  14237. diff -Nur linux-4.4.62.orig/include/linux/timer.h linux-4.4.62/include/linux/timer.h
  14238. --- linux-4.4.62.orig/include/linux/timer.h 2017-04-18 07:15:37.000000000 +0200
  14239. +++ linux-4.4.62/include/linux/timer.h 2017-04-18 17:38:08.198649401 +0200
  14240. @@ -225,7 +225,7 @@
  14241. extern int try_to_del_timer_sync(struct timer_list *timer);
  14242. -#ifdef CONFIG_SMP
  14243. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  14244. extern int del_timer_sync(struct timer_list *timer);
  14245. #else
  14246. # define del_timer_sync(t) del_timer(t)
  14247. diff -Nur linux-4.4.62.orig/include/linux/trace_events.h linux-4.4.62/include/linux/trace_events.h
  14248. --- linux-4.4.62.orig/include/linux/trace_events.h 2017-04-18 07:15:37.000000000 +0200
  14249. +++ linux-4.4.62/include/linux/trace_events.h 2017-04-18 17:38:08.198649401 +0200
  14250. @@ -66,6 +66,9 @@
  14251. unsigned char flags;
  14252. unsigned char preempt_count;
  14253. int pid;
  14254. + unsigned short migrate_disable;
  14255. + unsigned short padding;
  14256. + unsigned char preempt_lazy_count;
  14257. };
  14258. #define TRACE_EVENT_TYPE_MAX \
  14259. diff -Nur linux-4.4.62.orig/include/linux/uaccess.h linux-4.4.62/include/linux/uaccess.h
  14260. --- linux-4.4.62.orig/include/linux/uaccess.h 2017-04-18 07:15:37.000000000 +0200
  14261. +++ linux-4.4.62/include/linux/uaccess.h 2017-04-18 17:38:08.198649401 +0200
  14262. @@ -24,6 +24,7 @@
  14263. */
  14264. static inline void pagefault_disable(void)
  14265. {
  14266. + migrate_disable();
  14267. pagefault_disabled_inc();
  14268. /*
  14269. * make sure to have issued the store before a pagefault
  14270. @@ -40,6 +41,7 @@
  14271. */
  14272. barrier();
  14273. pagefault_disabled_dec();
  14274. + migrate_enable();
  14275. }
  14276. /*
  14277. diff -Nur linux-4.4.62.orig/include/linux/uprobes.h linux-4.4.62/include/linux/uprobes.h
  14278. --- linux-4.4.62.orig/include/linux/uprobes.h 2017-04-18 07:15:37.000000000 +0200
  14279. +++ linux-4.4.62/include/linux/uprobes.h 2017-04-18 17:38:08.198649401 +0200
  14280. @@ -27,6 +27,7 @@
  14281. #include <linux/errno.h>
  14282. #include <linux/rbtree.h>
  14283. #include <linux/types.h>
  14284. +#include <linux/wait.h>
  14285. struct vm_area_struct;
  14286. struct mm_struct;
  14287. diff -Nur linux-4.4.62.orig/include/linux/vmstat.h linux-4.4.62/include/linux/vmstat.h
  14288. --- linux-4.4.62.orig/include/linux/vmstat.h 2017-04-18 07:15:37.000000000 +0200
  14289. +++ linux-4.4.62/include/linux/vmstat.h 2017-04-18 17:38:08.202649555 +0200
  14290. @@ -33,7 +33,9 @@
  14291. */
  14292. static inline void __count_vm_event(enum vm_event_item item)
  14293. {
  14294. + preempt_disable_rt();
  14295. raw_cpu_inc(vm_event_states.event[item]);
  14296. + preempt_enable_rt();
  14297. }
  14298. static inline void count_vm_event(enum vm_event_item item)
  14299. @@ -43,7 +45,9 @@
  14300. static inline void __count_vm_events(enum vm_event_item item, long delta)
  14301. {
  14302. + preempt_disable_rt();
  14303. raw_cpu_add(vm_event_states.event[item], delta);
  14304. + preempt_enable_rt();
  14305. }
  14306. static inline void count_vm_events(enum vm_event_item item, long delta)
  14307. diff -Nur linux-4.4.62.orig/include/linux/wait.h linux-4.4.62/include/linux/wait.h
  14308. --- linux-4.4.62.orig/include/linux/wait.h 2017-04-18 07:15:37.000000000 +0200
  14309. +++ linux-4.4.62/include/linux/wait.h 2017-04-18 17:38:08.202649555 +0200
  14310. @@ -8,6 +8,7 @@
  14311. #include <linux/spinlock.h>
  14312. #include <asm/current.h>
  14313. #include <uapi/linux/wait.h>
  14314. +#include <linux/atomic.h>
  14315. typedef struct __wait_queue wait_queue_t;
  14316. typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
  14317. diff -Nur linux-4.4.62.orig/include/net/dst.h linux-4.4.62/include/net/dst.h
  14318. --- linux-4.4.62.orig/include/net/dst.h 2017-04-18 07:15:37.000000000 +0200
  14319. +++ linux-4.4.62/include/net/dst.h 2017-04-18 17:38:08.202649555 +0200
  14320. @@ -437,7 +437,7 @@
  14321. static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
  14322. struct sk_buff *skb)
  14323. {
  14324. - const struct hh_cache *hh;
  14325. + struct hh_cache *hh;
  14326. if (dst->pending_confirm) {
  14327. unsigned long now = jiffies;
  14328. diff -Nur linux-4.4.62.orig/include/net/neighbour.h linux-4.4.62/include/net/neighbour.h
  14329. --- linux-4.4.62.orig/include/net/neighbour.h 2017-04-18 07:15:37.000000000 +0200
  14330. +++ linux-4.4.62/include/net/neighbour.h 2017-04-18 17:38:08.202649555 +0200
  14331. @@ -446,7 +446,7 @@
  14332. }
  14333. #endif
  14334. -static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb)
  14335. +static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb)
  14336. {
  14337. unsigned int seq;
  14338. int hh_len;
  14339. @@ -501,7 +501,7 @@
  14340. #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb)
  14341. -static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n,
  14342. +static inline void neigh_ha_snapshot(char *dst, struct neighbour *n,
  14343. const struct net_device *dev)
  14344. {
  14345. unsigned int seq;
  14346. diff -Nur linux-4.4.62.orig/include/net/netns/ipv4.h linux-4.4.62/include/net/netns/ipv4.h
  14347. --- linux-4.4.62.orig/include/net/netns/ipv4.h 2017-04-18 07:15:37.000000000 +0200
  14348. +++ linux-4.4.62/include/net/netns/ipv4.h 2017-04-18 17:38:08.202649555 +0200
  14349. @@ -70,6 +70,7 @@
  14350. int sysctl_icmp_echo_ignore_all;
  14351. int sysctl_icmp_echo_ignore_broadcasts;
  14352. + int sysctl_icmp_echo_sysrq;
  14353. int sysctl_icmp_ignore_bogus_error_responses;
  14354. int sysctl_icmp_ratelimit;
  14355. int sysctl_icmp_ratemask;
  14356. diff -Nur linux-4.4.62.orig/include/trace/events/hist.h linux-4.4.62/include/trace/events/hist.h
  14357. --- linux-4.4.62.orig/include/trace/events/hist.h 1970-01-01 01:00:00.000000000 +0100
  14358. +++ linux-4.4.62/include/trace/events/hist.h 2017-04-18 17:38:08.202649555 +0200
  14359. @@ -0,0 +1,73 @@
  14360. +#undef TRACE_SYSTEM
  14361. +#define TRACE_SYSTEM hist
  14362. +
  14363. +#if !defined(_TRACE_HIST_H) || defined(TRACE_HEADER_MULTI_READ)
  14364. +#define _TRACE_HIST_H
  14365. +
  14366. +#include "latency_hist.h"
  14367. +#include <linux/tracepoint.h>
  14368. +
  14369. +#if !defined(CONFIG_PREEMPT_OFF_HIST) && !defined(CONFIG_INTERRUPT_OFF_HIST)
  14370. +#define trace_preemptirqsoff_hist(a, b)
  14371. +#define trace_preemptirqsoff_hist_rcuidle(a, b)
  14372. +#else
  14373. +TRACE_EVENT(preemptirqsoff_hist,
  14374. +
  14375. + TP_PROTO(int reason, int starthist),
  14376. +
  14377. + TP_ARGS(reason, starthist),
  14378. +
  14379. + TP_STRUCT__entry(
  14380. + __field(int, reason)
  14381. + __field(int, starthist)
  14382. + ),
  14383. +
  14384. + TP_fast_assign(
  14385. + __entry->reason = reason;
  14386. + __entry->starthist = starthist;
  14387. + ),
  14388. +
  14389. + TP_printk("reason=%s starthist=%s", getaction(__entry->reason),
  14390. + __entry->starthist ? "start" : "stop")
  14391. +);
  14392. +#endif
  14393. +
  14394. +#ifndef CONFIG_MISSED_TIMER_OFFSETS_HIST
  14395. +#define trace_hrtimer_interrupt(a, b, c, d)
  14396. +#else
  14397. +TRACE_EVENT(hrtimer_interrupt,
  14398. +
  14399. + TP_PROTO(int cpu, long long offset, struct task_struct *curr,
  14400. + struct task_struct *task),
  14401. +
  14402. + TP_ARGS(cpu, offset, curr, task),
  14403. +
  14404. + TP_STRUCT__entry(
  14405. + __field(int, cpu)
  14406. + __field(long long, offset)
  14407. + __array(char, ccomm, TASK_COMM_LEN)
  14408. + __field(int, cprio)
  14409. + __array(char, tcomm, TASK_COMM_LEN)
  14410. + __field(int, tprio)
  14411. + ),
  14412. +
  14413. + TP_fast_assign(
  14414. + __entry->cpu = cpu;
  14415. + __entry->offset = offset;
  14416. + memcpy(__entry->ccomm, curr->comm, TASK_COMM_LEN);
  14417. + __entry->cprio = curr->prio;
  14418. + memcpy(__entry->tcomm, task != NULL ? task->comm : "<none>",
  14419. + task != NULL ? TASK_COMM_LEN : 7);
  14420. + __entry->tprio = task != NULL ? task->prio : -1;
  14421. + ),
  14422. +
  14423. + TP_printk("cpu=%d offset=%lld curr=%s[%d] thread=%s[%d]",
  14424. + __entry->cpu, __entry->offset, __entry->ccomm,
  14425. + __entry->cprio, __entry->tcomm, __entry->tprio)
  14426. +);
  14427. +#endif
  14428. +
  14429. +#endif /* _TRACE_HIST_H */
  14430. +
  14431. +/* This part must be outside protection */
  14432. +#include <trace/define_trace.h>
  14433. diff -Nur linux-4.4.62.orig/include/trace/events/latency_hist.h linux-4.4.62/include/trace/events/latency_hist.h
  14434. --- linux-4.4.62.orig/include/trace/events/latency_hist.h 1970-01-01 01:00:00.000000000 +0100
  14435. +++ linux-4.4.62/include/trace/events/latency_hist.h 2017-04-18 17:38:08.202649555 +0200
  14436. @@ -0,0 +1,29 @@
  14437. +#ifndef _LATENCY_HIST_H
  14438. +#define _LATENCY_HIST_H
  14439. +
  14440. +enum hist_action {
  14441. + IRQS_ON,
  14442. + PREEMPT_ON,
  14443. + TRACE_STOP,
  14444. + IRQS_OFF,
  14445. + PREEMPT_OFF,
  14446. + TRACE_START,
  14447. +};
  14448. +
  14449. +static char *actions[] = {
  14450. + "IRQS_ON",
  14451. + "PREEMPT_ON",
  14452. + "TRACE_STOP",
  14453. + "IRQS_OFF",
  14454. + "PREEMPT_OFF",
  14455. + "TRACE_START",
  14456. +};
  14457. +
  14458. +static inline char *getaction(int action)
  14459. +{
  14460. + if (action >= 0 && action <= sizeof(actions)/sizeof(actions[0]))
  14461. + return actions[action];
  14462. + return "unknown";
  14463. +}
  14464. +
  14465. +#endif /* _LATENCY_HIST_H */
  14466. diff -Nur linux-4.4.62.orig/include/trace/events/writeback.h linux-4.4.62/include/trace/events/writeback.h
  14467. --- linux-4.4.62.orig/include/trace/events/writeback.h 2017-04-18 07:15:37.000000000 +0200
  14468. +++ linux-4.4.62/include/trace/events/writeback.h 2017-04-18 17:38:08.202649555 +0200
  14469. @@ -134,58 +134,28 @@
  14470. #ifdef CREATE_TRACE_POINTS
  14471. #ifdef CONFIG_CGROUP_WRITEBACK
  14472. -static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb)
  14473. +static inline unsigned int __trace_wb_assign_cgroup(struct bdi_writeback *wb)
  14474. {
  14475. - return kernfs_path_len(wb->memcg_css->cgroup->kn) + 1;
  14476. + return wb->memcg_css->cgroup->kn->ino;
  14477. }
  14478. -static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb)
  14479. -{
  14480. - struct cgroup *cgrp = wb->memcg_css->cgroup;
  14481. - char *path;
  14482. -
  14483. - path = cgroup_path(cgrp, buf, kernfs_path_len(cgrp->kn) + 1);
  14484. - WARN_ON_ONCE(path != buf);
  14485. -}
  14486. -
  14487. -static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc)
  14488. -{
  14489. - if (wbc->wb)
  14490. - return __trace_wb_cgroup_size(wbc->wb);
  14491. - else
  14492. - return 2;
  14493. -}
  14494. -
  14495. -static inline void __trace_wbc_assign_cgroup(char *buf,
  14496. - struct writeback_control *wbc)
  14497. +static inline unsigned int __trace_wbc_assign_cgroup(struct writeback_control *wbc)
  14498. {
  14499. if (wbc->wb)
  14500. - __trace_wb_assign_cgroup(buf, wbc->wb);
  14501. + return __trace_wb_assign_cgroup(wbc->wb);
  14502. else
  14503. - strcpy(buf, "/");
  14504. + return -1U;
  14505. }
  14506. -
  14507. #else /* CONFIG_CGROUP_WRITEBACK */
  14508. -static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb)
  14509. -{
  14510. - return 2;
  14511. -}
  14512. -
  14513. -static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb)
  14514. -{
  14515. - strcpy(buf, "/");
  14516. -}
  14517. -
  14518. -static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc)
  14519. +static inline unsigned int __trace_wb_assign_cgroup(struct bdi_writeback *wb)
  14520. {
  14521. - return 2;
  14522. + return -1U;
  14523. }
  14524. -static inline void __trace_wbc_assign_cgroup(char *buf,
  14525. - struct writeback_control *wbc)
  14526. +static inline unsigned int __trace_wbc_assign_cgroup(struct writeback_control *wbc)
  14527. {
  14528. - strcpy(buf, "/");
  14529. + return -1U;
  14530. }
  14531. #endif /* CONFIG_CGROUP_WRITEBACK */
  14532. @@ -201,7 +171,7 @@
  14533. __array(char, name, 32)
  14534. __field(unsigned long, ino)
  14535. __field(int, sync_mode)
  14536. - __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
  14537. + __field(unsigned int, cgroup_ino)
  14538. ),
  14539. TP_fast_assign(
  14540. @@ -209,14 +179,14 @@
  14541. dev_name(inode_to_bdi(inode)->dev), 32);
  14542. __entry->ino = inode->i_ino;
  14543. __entry->sync_mode = wbc->sync_mode;
  14544. - __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
  14545. + __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
  14546. ),
  14547. - TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup=%s",
  14548. + TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup_ino=%u",
  14549. __entry->name,
  14550. __entry->ino,
  14551. __entry->sync_mode,
  14552. - __get_str(cgroup)
  14553. + __entry->cgroup_ino
  14554. )
  14555. );
  14556. @@ -246,7 +216,7 @@
  14557. __field(int, range_cyclic)
  14558. __field(int, for_background)
  14559. __field(int, reason)
  14560. - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
  14561. + __field(unsigned int, cgroup_ino)
  14562. ),
  14563. TP_fast_assign(
  14564. strncpy(__entry->name,
  14565. @@ -258,10 +228,10 @@
  14566. __entry->range_cyclic = work->range_cyclic;
  14567. __entry->for_background = work->for_background;
  14568. __entry->reason = work->reason;
  14569. - __trace_wb_assign_cgroup(__get_str(cgroup), wb);
  14570. + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
  14571. ),
  14572. TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
  14573. - "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup=%s",
  14574. + "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup_ino=%u",
  14575. __entry->name,
  14576. MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
  14577. __entry->nr_pages,
  14578. @@ -270,7 +240,7 @@
  14579. __entry->range_cyclic,
  14580. __entry->for_background,
  14581. __print_symbolic(__entry->reason, WB_WORK_REASON),
  14582. - __get_str(cgroup)
  14583. + __entry->cgroup_ino
  14584. )
  14585. );
  14586. #define DEFINE_WRITEBACK_WORK_EVENT(name) \
  14587. @@ -300,15 +270,15 @@
  14588. TP_ARGS(wb),
  14589. TP_STRUCT__entry(
  14590. __array(char, name, 32)
  14591. - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
  14592. + __field(unsigned int, cgroup_ino)
  14593. ),
  14594. TP_fast_assign(
  14595. strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
  14596. - __trace_wb_assign_cgroup(__get_str(cgroup), wb);
  14597. + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
  14598. ),
  14599. - TP_printk("bdi %s: cgroup=%s",
  14600. + TP_printk("bdi %s: cgroup_ino=%u",
  14601. __entry->name,
  14602. - __get_str(cgroup)
  14603. + __entry->cgroup_ino
  14604. )
  14605. );
  14606. #define DEFINE_WRITEBACK_EVENT(name) \
  14607. @@ -347,7 +317,7 @@
  14608. __field(int, range_cyclic)
  14609. __field(long, range_start)
  14610. __field(long, range_end)
  14611. - __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
  14612. + __field(unsigned int, cgroup_ino)
  14613. ),
  14614. TP_fast_assign(
  14615. @@ -361,12 +331,12 @@
  14616. __entry->range_cyclic = wbc->range_cyclic;
  14617. __entry->range_start = (long)wbc->range_start;
  14618. __entry->range_end = (long)wbc->range_end;
  14619. - __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
  14620. + __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
  14621. ),
  14622. TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d "
  14623. "bgrd=%d reclm=%d cyclic=%d "
  14624. - "start=0x%lx end=0x%lx cgroup=%s",
  14625. + "start=0x%lx end=0x%lx cgroup_ino=%u",
  14626. __entry->name,
  14627. __entry->nr_to_write,
  14628. __entry->pages_skipped,
  14629. @@ -377,7 +347,7 @@
  14630. __entry->range_cyclic,
  14631. __entry->range_start,
  14632. __entry->range_end,
  14633. - __get_str(cgroup)
  14634. + __entry->cgroup_ino
  14635. )
  14636. )
  14637. @@ -398,7 +368,7 @@
  14638. __field(long, age)
  14639. __field(int, moved)
  14640. __field(int, reason)
  14641. - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
  14642. + __field(unsigned int, cgroup_ino)
  14643. ),
  14644. TP_fast_assign(
  14645. unsigned long *older_than_this = work->older_than_this;
  14646. @@ -408,15 +378,15 @@
  14647. (jiffies - *older_than_this) * 1000 / HZ : -1;
  14648. __entry->moved = moved;
  14649. __entry->reason = work->reason;
  14650. - __trace_wb_assign_cgroup(__get_str(cgroup), wb);
  14651. + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
  14652. ),
  14653. - TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup=%s",
  14654. + TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup_ino=%u",
  14655. __entry->name,
  14656. __entry->older, /* older_than_this in jiffies */
  14657. __entry->age, /* older_than_this in relative milliseconds */
  14658. __entry->moved,
  14659. __print_symbolic(__entry->reason, WB_WORK_REASON),
  14660. - __get_str(cgroup)
  14661. + __entry->cgroup_ino
  14662. )
  14663. );
  14664. @@ -484,7 +454,7 @@
  14665. __field(unsigned long, dirty_ratelimit)
  14666. __field(unsigned long, task_ratelimit)
  14667. __field(unsigned long, balanced_dirty_ratelimit)
  14668. - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
  14669. + __field(unsigned int, cgroup_ino)
  14670. ),
  14671. TP_fast_assign(
  14672. @@ -496,13 +466,13 @@
  14673. __entry->task_ratelimit = KBps(task_ratelimit);
  14674. __entry->balanced_dirty_ratelimit =
  14675. KBps(wb->balanced_dirty_ratelimit);
  14676. - __trace_wb_assign_cgroup(__get_str(cgroup), wb);
  14677. + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
  14678. ),
  14679. TP_printk("bdi %s: "
  14680. "write_bw=%lu awrite_bw=%lu dirty_rate=%lu "
  14681. "dirty_ratelimit=%lu task_ratelimit=%lu "
  14682. - "balanced_dirty_ratelimit=%lu cgroup=%s",
  14683. + "balanced_dirty_ratelimit=%lu cgroup_ino=%u",
  14684. __entry->bdi,
  14685. __entry->write_bw, /* write bandwidth */
  14686. __entry->avg_write_bw, /* avg write bandwidth */
  14687. @@ -510,7 +480,7 @@
  14688. __entry->dirty_ratelimit, /* base ratelimit */
  14689. __entry->task_ratelimit, /* ratelimit with position control */
  14690. __entry->balanced_dirty_ratelimit, /* the balanced ratelimit */
  14691. - __get_str(cgroup)
  14692. + __entry->cgroup_ino
  14693. )
  14694. );
  14695. @@ -548,7 +518,7 @@
  14696. __field( long, pause)
  14697. __field(unsigned long, period)
  14698. __field( long, think)
  14699. - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
  14700. + __field(unsigned int, cgroup_ino)
  14701. ),
  14702. TP_fast_assign(
  14703. @@ -571,7 +541,7 @@
  14704. __entry->period = period * 1000 / HZ;
  14705. __entry->pause = pause * 1000 / HZ;
  14706. __entry->paused = (jiffies - start_time) * 1000 / HZ;
  14707. - __trace_wb_assign_cgroup(__get_str(cgroup), wb);
  14708. + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
  14709. ),
  14710. @@ -580,7 +550,7 @@
  14711. "bdi_setpoint=%lu bdi_dirty=%lu "
  14712. "dirty_ratelimit=%lu task_ratelimit=%lu "
  14713. "dirtied=%u dirtied_pause=%u "
  14714. - "paused=%lu pause=%ld period=%lu think=%ld cgroup=%s",
  14715. + "paused=%lu pause=%ld period=%lu think=%ld cgroup_ino=%u",
  14716. __entry->bdi,
  14717. __entry->limit,
  14718. __entry->setpoint,
  14719. @@ -595,7 +565,7 @@
  14720. __entry->pause, /* ms */
  14721. __entry->period, /* ms */
  14722. __entry->think, /* ms */
  14723. - __get_str(cgroup)
  14724. + __entry->cgroup_ino
  14725. )
  14726. );
  14727. @@ -609,8 +579,7 @@
  14728. __field(unsigned long, ino)
  14729. __field(unsigned long, state)
  14730. __field(unsigned long, dirtied_when)
  14731. - __dynamic_array(char, cgroup,
  14732. - __trace_wb_cgroup_size(inode_to_wb(inode)))
  14733. + __field(unsigned int, cgroup_ino)
  14734. ),
  14735. TP_fast_assign(
  14736. @@ -619,16 +588,16 @@
  14737. __entry->ino = inode->i_ino;
  14738. __entry->state = inode->i_state;
  14739. __entry->dirtied_when = inode->dirtied_when;
  14740. - __trace_wb_assign_cgroup(__get_str(cgroup), inode_to_wb(inode));
  14741. + __entry->cgroup_ino = __trace_wb_assign_cgroup(inode_to_wb(inode));
  14742. ),
  14743. - TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup=%s",
  14744. + TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup_ino=%u",
  14745. __entry->name,
  14746. __entry->ino,
  14747. show_inode_state(__entry->state),
  14748. __entry->dirtied_when,
  14749. (jiffies - __entry->dirtied_when) / HZ,
  14750. - __get_str(cgroup)
  14751. + __entry->cgroup_ino
  14752. )
  14753. );
  14754. @@ -684,7 +653,7 @@
  14755. __field(unsigned long, writeback_index)
  14756. __field(long, nr_to_write)
  14757. __field(unsigned long, wrote)
  14758. - __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
  14759. + __field(unsigned int, cgroup_ino)
  14760. ),
  14761. TP_fast_assign(
  14762. @@ -696,11 +665,11 @@
  14763. __entry->writeback_index = inode->i_mapping->writeback_index;
  14764. __entry->nr_to_write = nr_to_write;
  14765. __entry->wrote = nr_to_write - wbc->nr_to_write;
  14766. - __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
  14767. + __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
  14768. ),
  14769. TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu "
  14770. - "index=%lu to_write=%ld wrote=%lu cgroup=%s",
  14771. + "index=%lu to_write=%ld wrote=%lu cgroup_ino=%u",
  14772. __entry->name,
  14773. __entry->ino,
  14774. show_inode_state(__entry->state),
  14775. @@ -709,7 +678,7 @@
  14776. __entry->writeback_index,
  14777. __entry->nr_to_write,
  14778. __entry->wrote,
  14779. - __get_str(cgroup)
  14780. + __entry->cgroup_ino
  14781. )
  14782. );
  14783. diff -Nur linux-4.4.62.orig/init/Kconfig linux-4.4.62/init/Kconfig
  14784. --- linux-4.4.62.orig/init/Kconfig 2017-04-18 07:15:37.000000000 +0200
  14785. +++ linux-4.4.62/init/Kconfig 2017-04-18 17:38:08.202649555 +0200
  14786. @@ -498,7 +498,7 @@
  14787. config RCU_EXPERT
  14788. bool "Make expert-level adjustments to RCU configuration"
  14789. - default n
  14790. + default y if PREEMPT_RT_FULL
  14791. help
  14792. This option needs to be enabled if you wish to make
  14793. expert-level adjustments to RCU configuration. By default,
  14794. @@ -614,7 +614,7 @@
  14795. config RCU_FAST_NO_HZ
  14796. bool "Accelerate last non-dyntick-idle CPU's grace periods"
  14797. - depends on NO_HZ_COMMON && SMP && RCU_EXPERT
  14798. + depends on NO_HZ_COMMON && SMP && RCU_EXPERT && !PREEMPT_RT_FULL
  14799. default n
  14800. help
  14801. This option permits CPUs to enter dynticks-idle state even if
  14802. @@ -641,7 +641,7 @@
  14803. config RCU_BOOST
  14804. bool "Enable RCU priority boosting"
  14805. depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT
  14806. - default n
  14807. + default y if PREEMPT_RT_FULL
  14808. help
  14809. This option boosts the priority of preempted RCU readers that
  14810. block the current preemptible RCU grace period for too long.
  14811. @@ -1106,6 +1106,7 @@
  14812. config RT_GROUP_SCHED
  14813. bool "Group scheduling for SCHED_RR/FIFO"
  14814. depends on CGROUP_SCHED
  14815. + depends on !PREEMPT_RT_FULL
  14816. default n
  14817. help
  14818. This feature lets you explicitly allocate real CPU bandwidth
  14819. @@ -1719,6 +1720,7 @@
  14820. config SLAB
  14821. bool "SLAB"
  14822. + depends on !PREEMPT_RT_FULL
  14823. help
  14824. The regular slab allocator that is established and known to work
  14825. well in all environments. It organizes cache hot objects in
  14826. @@ -1737,6 +1739,7 @@
  14827. config SLOB
  14828. depends on EXPERT
  14829. bool "SLOB (Simple Allocator)"
  14830. + depends on !PREEMPT_RT_FULL
  14831. help
  14832. SLOB replaces the stock allocator with a drastically simpler
  14833. allocator. SLOB is generally more space efficient but
  14834. @@ -1746,7 +1749,7 @@
  14835. config SLUB_CPU_PARTIAL
  14836. default y
  14837. - depends on SLUB && SMP
  14838. + depends on SLUB && SMP && !PREEMPT_RT_FULL
  14839. bool "SLUB per cpu partial cache"
  14840. help
  14841. Per cpu partial caches accellerate objects allocation and freeing
  14842. diff -Nur linux-4.4.62.orig/init/main.c linux-4.4.62/init/main.c
  14843. --- linux-4.4.62.orig/init/main.c 2017-04-18 07:15:37.000000000 +0200
  14844. +++ linux-4.4.62/init/main.c 2017-04-18 17:38:08.202649555 +0200
  14845. @@ -530,6 +530,7 @@
  14846. setup_command_line(command_line);
  14847. setup_nr_cpu_ids();
  14848. setup_per_cpu_areas();
  14849. + softirq_early_init();
  14850. smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
  14851. build_all_zonelists(NULL, NULL);
  14852. diff -Nur linux-4.4.62.orig/init/Makefile linux-4.4.62/init/Makefile
  14853. --- linux-4.4.62.orig/init/Makefile 2017-04-18 07:15:37.000000000 +0200
  14854. +++ linux-4.4.62/init/Makefile 2017-04-18 17:38:08.202649555 +0200
  14855. @@ -33,4 +33,4 @@
  14856. include/generated/compile.h: FORCE
  14857. @$($(quiet)chk_compile.h)
  14858. $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \
  14859. - "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)"
  14860. + "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CONFIG_PREEMPT_RT_FULL)" "$(CC) $(KBUILD_CFLAGS)"
  14861. diff -Nur linux-4.4.62.orig/ipc/msg.c linux-4.4.62/ipc/msg.c
  14862. --- linux-4.4.62.orig/ipc/msg.c 2017-04-18 07:15:37.000000000 +0200
  14863. +++ linux-4.4.62/ipc/msg.c 2017-04-18 17:38:08.202649555 +0200
  14864. @@ -183,20 +183,14 @@
  14865. }
  14866. }
  14867. -static void expunge_all(struct msg_queue *msq, int res)
  14868. +static void expunge_all(struct msg_queue *msq, int res,
  14869. + struct wake_q_head *wake_q)
  14870. {
  14871. struct msg_receiver *msr, *t;
  14872. list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
  14873. - msr->r_msg = NULL; /* initialize expunge ordering */
  14874. - wake_up_process(msr->r_tsk);
  14875. - /*
  14876. - * Ensure that the wakeup is visible before setting r_msg as
  14877. - * the receiving end depends on it: either spinning on a nil,
  14878. - * or dealing with -EAGAIN cases. See lockless receive part 1
  14879. - * and 2 in do_msgrcv().
  14880. - */
  14881. - smp_wmb(); /* barrier (B) */
  14882. +
  14883. + wake_q_add(wake_q, msr->r_tsk);
  14884. msr->r_msg = ERR_PTR(res);
  14885. }
  14886. }
  14887. @@ -213,11 +207,13 @@
  14888. {
  14889. struct msg_msg *msg, *t;
  14890. struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
  14891. + WAKE_Q(wake_q);
  14892. - expunge_all(msq, -EIDRM);
  14893. + expunge_all(msq, -EIDRM, &wake_q);
  14894. ss_wakeup(&msq->q_senders, 1);
  14895. msg_rmid(ns, msq);
  14896. ipc_unlock_object(&msq->q_perm);
  14897. + wake_up_q(&wake_q);
  14898. rcu_read_unlock();
  14899. list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
  14900. @@ -342,6 +338,7 @@
  14901. struct kern_ipc_perm *ipcp;
  14902. struct msqid64_ds uninitialized_var(msqid64);
  14903. struct msg_queue *msq;
  14904. + WAKE_Q(wake_q);
  14905. int err;
  14906. if (cmd == IPC_SET) {
  14907. @@ -389,7 +386,7 @@
  14908. /* sleeping receivers might be excluded by
  14909. * stricter permissions.
  14910. */
  14911. - expunge_all(msq, -EAGAIN);
  14912. + expunge_all(msq, -EAGAIN, &wake_q);
  14913. /* sleeping senders might be able to send
  14914. * due to a larger queue size.
  14915. */
  14916. @@ -402,6 +399,7 @@
  14917. out_unlock0:
  14918. ipc_unlock_object(&msq->q_perm);
  14919. + wake_up_q(&wake_q);
  14920. out_unlock1:
  14921. rcu_read_unlock();
  14922. out_up:
  14923. @@ -566,7 +564,8 @@
  14924. return 0;
  14925. }
  14926. -static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
  14927. +static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg,
  14928. + struct wake_q_head *wake_q)
  14929. {
  14930. struct msg_receiver *msr, *t;
  14931. @@ -577,27 +576,13 @@
  14932. list_del(&msr->r_list);
  14933. if (msr->r_maxsize < msg->m_ts) {
  14934. - /* initialize pipelined send ordering */
  14935. - msr->r_msg = NULL;
  14936. - wake_up_process(msr->r_tsk);
  14937. - /* barrier (B) see barrier comment below */
  14938. - smp_wmb();
  14939. + wake_q_add(wake_q, msr->r_tsk);
  14940. msr->r_msg = ERR_PTR(-E2BIG);
  14941. } else {
  14942. - msr->r_msg = NULL;
  14943. msq->q_lrpid = task_pid_vnr(msr->r_tsk);
  14944. msq->q_rtime = get_seconds();
  14945. - wake_up_process(msr->r_tsk);
  14946. - /*
  14947. - * Ensure that the wakeup is visible before
  14948. - * setting r_msg, as the receiving can otherwise
  14949. - * exit - once r_msg is set, the receiver can
  14950. - * continue. See lockless receive part 1 and 2
  14951. - * in do_msgrcv(). Barrier (B).
  14952. - */
  14953. - smp_wmb();
  14954. + wake_q_add(wake_q, msr->r_tsk);
  14955. msr->r_msg = msg;
  14956. -
  14957. return 1;
  14958. }
  14959. }
  14960. @@ -613,6 +598,7 @@
  14961. struct msg_msg *msg;
  14962. int err;
  14963. struct ipc_namespace *ns;
  14964. + WAKE_Q(wake_q);
  14965. ns = current->nsproxy->ipc_ns;
  14966. @@ -698,7 +684,7 @@
  14967. msq->q_lspid = task_tgid_vnr(current);
  14968. msq->q_stime = get_seconds();
  14969. - if (!pipelined_send(msq, msg)) {
  14970. + if (!pipelined_send(msq, msg, &wake_q)) {
  14971. /* no one is waiting for this message, enqueue it */
  14972. list_add_tail(&msg->m_list, &msq->q_messages);
  14973. msq->q_cbytes += msgsz;
  14974. @@ -712,6 +698,7 @@
  14975. out_unlock0:
  14976. ipc_unlock_object(&msq->q_perm);
  14977. + wake_up_q(&wake_q);
  14978. out_unlock1:
  14979. rcu_read_unlock();
  14980. if (msg != NULL)
  14981. @@ -932,57 +919,25 @@
  14982. rcu_read_lock();
  14983. /* Lockless receive, part 2:
  14984. - * Wait until pipelined_send or expunge_all are outside of
  14985. - * wake_up_process(). There is a race with exit(), see
  14986. - * ipc/mqueue.c for the details. The correct serialization
  14987. - * ensures that a receiver cannot continue without the wakeup
  14988. - * being visibible _before_ setting r_msg:
  14989. + * The work in pipelined_send() and expunge_all():
  14990. + * - Set pointer to message
  14991. + * - Queue the receiver task for later wakeup
  14992. + * - Wake up the process after the lock is dropped.
  14993. *
  14994. - * CPU 0 CPU 1
  14995. - * <loop receiver>
  14996. - * smp_rmb(); (A) <-- pair -. <waker thread>
  14997. - * <load ->r_msg> | msr->r_msg = NULL;
  14998. - * | wake_up_process();
  14999. - * <continue> `------> smp_wmb(); (B)
  15000. - * msr->r_msg = msg;
  15001. - *
  15002. - * Where (A) orders the message value read and where (B) orders
  15003. - * the write to the r_msg -- done in both pipelined_send and
  15004. - * expunge_all.
  15005. + * Should the process wake up before this wakeup (due to a
  15006. + * signal) it will either see the message and continue …
  15007. */
  15008. - for (;;) {
  15009. - /*
  15010. - * Pairs with writer barrier in pipelined_send
  15011. - * or expunge_all.
  15012. - */
  15013. - smp_rmb(); /* barrier (A) */
  15014. - msg = (struct msg_msg *)msr_d.r_msg;
  15015. - if (msg)
  15016. - break;
  15017. - /*
  15018. - * The cpu_relax() call is a compiler barrier
  15019. - * which forces everything in this loop to be
  15020. - * re-loaded.
  15021. - */
  15022. - cpu_relax();
  15023. - }
  15024. -
  15025. - /* Lockless receive, part 3:
  15026. - * If there is a message or an error then accept it without
  15027. - * locking.
  15028. - */
  15029. + msg = (struct msg_msg *)msr_d.r_msg;
  15030. if (msg != ERR_PTR(-EAGAIN))
  15031. goto out_unlock1;
  15032. - /* Lockless receive, part 3:
  15033. - * Acquire the queue spinlock.
  15034. - */
  15035. + /*
  15036. + * … or see -EAGAIN, acquire the lock to check the message
  15037. + * again.
  15038. + */
  15039. ipc_lock_object(&msq->q_perm);
  15040. - /* Lockless receive, part 4:
  15041. - * Repeat test after acquiring the spinlock.
  15042. - */
  15043. msg = (struct msg_msg *)msr_d.r_msg;
  15044. if (msg != ERR_PTR(-EAGAIN))
  15045. goto out_unlock0;
  15046. diff -Nur linux-4.4.62.orig/ipc/sem.c linux-4.4.62/ipc/sem.c
  15047. --- linux-4.4.62.orig/ipc/sem.c 2017-04-18 07:15:37.000000000 +0200
  15048. +++ linux-4.4.62/ipc/sem.c 2017-04-18 17:38:08.202649555 +0200
  15049. @@ -708,6 +708,13 @@
  15050. static void wake_up_sem_queue_prepare(struct list_head *pt,
  15051. struct sem_queue *q, int error)
  15052. {
  15053. +#ifdef CONFIG_PREEMPT_RT_BASE
  15054. + struct task_struct *p = q->sleeper;
  15055. + get_task_struct(p);
  15056. + q->status = error;
  15057. + wake_up_process(p);
  15058. + put_task_struct(p);
  15059. +#else
  15060. if (list_empty(pt)) {
  15061. /*
  15062. * Hold preempt off so that we don't get preempted and have the
  15063. @@ -719,6 +726,7 @@
  15064. q->pid = error;
  15065. list_add_tail(&q->list, pt);
  15066. +#endif
  15067. }
  15068. /**
  15069. @@ -732,6 +740,7 @@
  15070. */
  15071. static void wake_up_sem_queue_do(struct list_head *pt)
  15072. {
  15073. +#ifndef CONFIG_PREEMPT_RT_BASE
  15074. struct sem_queue *q, *t;
  15075. int did_something;
  15076. @@ -744,6 +753,7 @@
  15077. }
  15078. if (did_something)
  15079. preempt_enable();
  15080. +#endif
  15081. }
  15082. static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
  15083. diff -Nur linux-4.4.62.orig/kernel/cgroup.c linux-4.4.62/kernel/cgroup.c
  15084. --- linux-4.4.62.orig/kernel/cgroup.c 2017-04-18 07:15:37.000000000 +0200
  15085. +++ linux-4.4.62/kernel/cgroup.c 2017-04-18 17:38:08.202649555 +0200
  15086. @@ -4740,10 +4740,10 @@
  15087. queue_work(cgroup_destroy_wq, &css->destroy_work);
  15088. }
  15089. -static void css_release_work_fn(struct work_struct *work)
  15090. +static void css_release_work_fn(struct swork_event *sev)
  15091. {
  15092. struct cgroup_subsys_state *css =
  15093. - container_of(work, struct cgroup_subsys_state, destroy_work);
  15094. + container_of(sev, struct cgroup_subsys_state, destroy_swork);
  15095. struct cgroup_subsys *ss = css->ss;
  15096. struct cgroup *cgrp = css->cgroup;
  15097. @@ -4782,8 +4782,8 @@
  15098. struct cgroup_subsys_state *css =
  15099. container_of(ref, struct cgroup_subsys_state, refcnt);
  15100. - INIT_WORK(&css->destroy_work, css_release_work_fn);
  15101. - queue_work(cgroup_destroy_wq, &css->destroy_work);
  15102. + INIT_SWORK(&css->destroy_swork, css_release_work_fn);
  15103. + swork_queue(&css->destroy_swork);
  15104. }
  15105. static void init_and_link_css(struct cgroup_subsys_state *css,
  15106. @@ -5400,6 +5400,7 @@
  15107. */
  15108. cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
  15109. BUG_ON(!cgroup_destroy_wq);
  15110. + BUG_ON(swork_get());
  15111. /*
  15112. * Used to destroy pidlists and separate to serve as flush domain.
  15113. diff -Nur linux-4.4.62.orig/kernel/cpu.c linux-4.4.62/kernel/cpu.c
  15114. --- linux-4.4.62.orig/kernel/cpu.c 2017-04-18 07:15:37.000000000 +0200
  15115. +++ linux-4.4.62/kernel/cpu.c 2017-04-18 17:38:08.202649555 +0200
  15116. @@ -75,8 +75,8 @@
  15117. #endif
  15118. } cpu_hotplug = {
  15119. .active_writer = NULL,
  15120. - .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
  15121. .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
  15122. + .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
  15123. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  15124. .dep_map = {.name = "cpu_hotplug.lock" },
  15125. #endif
  15126. @@ -89,6 +89,289 @@
  15127. #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map)
  15128. #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map)
  15129. +/**
  15130. + * hotplug_pcp - per cpu hotplug descriptor
  15131. + * @unplug: set when pin_current_cpu() needs to sync tasks
  15132. + * @sync_tsk: the task that waits for tasks to finish pinned sections
  15133. + * @refcount: counter of tasks in pinned sections
  15134. + * @grab_lock: set when the tasks entering pinned sections should wait
  15135. + * @synced: notifier for @sync_tsk to tell cpu_down it's finished
  15136. + * @mutex: the mutex to make tasks wait (used when @grab_lock is true)
  15137. + * @mutex_init: zero if the mutex hasn't been initialized yet.
  15138. + *
  15139. + * Although @unplug and @sync_tsk may point to the same task, the @unplug
  15140. + * is used as a flag and still exists after @sync_tsk has exited and
  15141. + * @sync_tsk set to NULL.
  15142. + */
  15143. +struct hotplug_pcp {
  15144. + struct task_struct *unplug;
  15145. + struct task_struct *sync_tsk;
  15146. + int refcount;
  15147. + int grab_lock;
  15148. + struct completion synced;
  15149. + struct completion unplug_wait;
  15150. +#ifdef CONFIG_PREEMPT_RT_FULL
  15151. + /*
  15152. + * Note, on PREEMPT_RT, the hotplug lock must save the state of
  15153. + * the task, otherwise the mutex will cause the task to fail
  15154. + * to sleep when required. (Because it's called from migrate_disable())
  15155. + *
  15156. + * The spinlock_t on PREEMPT_RT is a mutex that saves the task's
  15157. + * state.
  15158. + */
  15159. + spinlock_t lock;
  15160. +#else
  15161. + struct mutex mutex;
  15162. +#endif
  15163. + int mutex_init;
  15164. +};
  15165. +
  15166. +#ifdef CONFIG_PREEMPT_RT_FULL
  15167. +# define hotplug_lock(hp) rt_spin_lock__no_mg(&(hp)->lock)
  15168. +# define hotplug_unlock(hp) rt_spin_unlock__no_mg(&(hp)->lock)
  15169. +#else
  15170. +# define hotplug_lock(hp) mutex_lock(&(hp)->mutex)
  15171. +# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex)
  15172. +#endif
  15173. +
  15174. +static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
  15175. +
  15176. +/**
  15177. + * pin_current_cpu - Prevent the current cpu from being unplugged
  15178. + *
  15179. + * Lightweight version of get_online_cpus() to prevent cpu from being
  15180. + * unplugged when code runs in a migration disabled region.
  15181. + *
  15182. + * Must be called with preemption disabled (preempt_count = 1)!
  15183. + */
  15184. +void pin_current_cpu(void)
  15185. +{
  15186. + struct hotplug_pcp *hp;
  15187. + int force = 0;
  15188. +
  15189. +retry:
  15190. + hp = this_cpu_ptr(&hotplug_pcp);
  15191. +
  15192. + if (!hp->unplug || hp->refcount || force || preempt_count() > 1 ||
  15193. + hp->unplug == current) {
  15194. + hp->refcount++;
  15195. + return;
  15196. + }
  15197. + if (hp->grab_lock) {
  15198. + preempt_enable();
  15199. + hotplug_lock(hp);
  15200. + hotplug_unlock(hp);
  15201. + } else {
  15202. + preempt_enable();
  15203. + /*
  15204. + * Try to push this task off of this CPU.
  15205. + */
  15206. + if (!migrate_me()) {
  15207. + preempt_disable();
  15208. + hp = this_cpu_ptr(&hotplug_pcp);
  15209. + if (!hp->grab_lock) {
  15210. + /*
  15211. + * Just let it continue it's already pinned
  15212. + * or about to sleep.
  15213. + */
  15214. + force = 1;
  15215. + goto retry;
  15216. + }
  15217. + preempt_enable();
  15218. + }
  15219. + }
  15220. + preempt_disable();
  15221. + goto retry;
  15222. +}
  15223. +
  15224. +/**
  15225. + * unpin_current_cpu - Allow unplug of current cpu
  15226. + *
  15227. + * Must be called with preemption or interrupts disabled!
  15228. + */
  15229. +void unpin_current_cpu(void)
  15230. +{
  15231. + struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp);
  15232. +
  15233. + WARN_ON(hp->refcount <= 0);
  15234. +
  15235. + /* This is safe. sync_unplug_thread is pinned to this cpu */
  15236. + if (!--hp->refcount && hp->unplug && hp->unplug != current)
  15237. + wake_up_process(hp->unplug);
  15238. +}
  15239. +
  15240. +static void wait_for_pinned_cpus(struct hotplug_pcp *hp)
  15241. +{
  15242. + set_current_state(TASK_UNINTERRUPTIBLE);
  15243. + while (hp->refcount) {
  15244. + schedule_preempt_disabled();
  15245. + set_current_state(TASK_UNINTERRUPTIBLE);
  15246. + }
  15247. +}
  15248. +
  15249. +static int sync_unplug_thread(void *data)
  15250. +{
  15251. + struct hotplug_pcp *hp = data;
  15252. +
  15253. + wait_for_completion(&hp->unplug_wait);
  15254. + preempt_disable();
  15255. + hp->unplug = current;
  15256. + wait_for_pinned_cpus(hp);
  15257. +
  15258. + /*
  15259. + * This thread will synchronize the cpu_down() with threads
  15260. + * that have pinned the CPU. When the pinned CPU count reaches
  15261. + * zero, we inform the cpu_down code to continue to the next step.
  15262. + */
  15263. + set_current_state(TASK_UNINTERRUPTIBLE);
  15264. + preempt_enable();
  15265. + complete(&hp->synced);
  15266. +
  15267. + /*
  15268. + * If all succeeds, the next step will need tasks to wait till
  15269. + * the CPU is offline before continuing. To do this, the grab_lock
  15270. + * is set and tasks going into pin_current_cpu() will block on the
  15271. + * mutex. But we still need to wait for those that are already in
  15272. + * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop()
  15273. + * will kick this thread out.
  15274. + */
  15275. + while (!hp->grab_lock && !kthread_should_stop()) {
  15276. + schedule();
  15277. + set_current_state(TASK_UNINTERRUPTIBLE);
  15278. + }
  15279. +
  15280. + /* Make sure grab_lock is seen before we see a stale completion */
  15281. + smp_mb();
  15282. +
  15283. + /*
  15284. + * Now just before cpu_down() enters stop machine, we need to make
  15285. + * sure all tasks that are in pinned CPU sections are out, and new
  15286. + * tasks will now grab the lock, keeping them from entering pinned
  15287. + * CPU sections.
  15288. + */
  15289. + if (!kthread_should_stop()) {
  15290. + preempt_disable();
  15291. + wait_for_pinned_cpus(hp);
  15292. + preempt_enable();
  15293. + complete(&hp->synced);
  15294. + }
  15295. +
  15296. + set_current_state(TASK_UNINTERRUPTIBLE);
  15297. + while (!kthread_should_stop()) {
  15298. + schedule();
  15299. + set_current_state(TASK_UNINTERRUPTIBLE);
  15300. + }
  15301. + set_current_state(TASK_RUNNING);
  15302. +
  15303. + /*
  15304. + * Force this thread off this CPU as it's going down and
  15305. + * we don't want any more work on this CPU.
  15306. + */
  15307. + current->flags &= ~PF_NO_SETAFFINITY;
  15308. + set_cpus_allowed_ptr(current, cpu_present_mask);
  15309. + migrate_me();
  15310. + return 0;
  15311. +}
  15312. +
  15313. +static void __cpu_unplug_sync(struct hotplug_pcp *hp)
  15314. +{
  15315. + wake_up_process(hp->sync_tsk);
  15316. + wait_for_completion(&hp->synced);
  15317. +}
  15318. +
  15319. +static void __cpu_unplug_wait(unsigned int cpu)
  15320. +{
  15321. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  15322. +
  15323. + complete(&hp->unplug_wait);
  15324. + wait_for_completion(&hp->synced);
  15325. +}
  15326. +
  15327. +/*
  15328. + * Start the sync_unplug_thread on the target cpu and wait for it to
  15329. + * complete.
  15330. + */
  15331. +static int cpu_unplug_begin(unsigned int cpu)
  15332. +{
  15333. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  15334. + int err;
  15335. +
  15336. + /* Protected by cpu_hotplug.lock */
  15337. + if (!hp->mutex_init) {
  15338. +#ifdef CONFIG_PREEMPT_RT_FULL
  15339. + spin_lock_init(&hp->lock);
  15340. +#else
  15341. + mutex_init(&hp->mutex);
  15342. +#endif
  15343. + hp->mutex_init = 1;
  15344. + }
  15345. +
  15346. + /* Inform the scheduler to migrate tasks off this CPU */
  15347. + tell_sched_cpu_down_begin(cpu);
  15348. +
  15349. + init_completion(&hp->synced);
  15350. + init_completion(&hp->unplug_wait);
  15351. +
  15352. + hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
  15353. + if (IS_ERR(hp->sync_tsk)) {
  15354. + err = PTR_ERR(hp->sync_tsk);
  15355. + hp->sync_tsk = NULL;
  15356. + return err;
  15357. + }
  15358. + kthread_bind(hp->sync_tsk, cpu);
  15359. +
  15360. + /*
  15361. + * Wait for tasks to get out of the pinned sections,
  15362. + * it's still OK if new tasks enter. Some CPU notifiers will
  15363. + * wait for tasks that are going to enter these sections and
  15364. + * we must not have them block.
  15365. + */
  15366. + wake_up_process(hp->sync_tsk);
  15367. + return 0;
  15368. +}
  15369. +
  15370. +static void cpu_unplug_sync(unsigned int cpu)
  15371. +{
  15372. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  15373. +
  15374. + init_completion(&hp->synced);
  15375. + /* The completion needs to be initialzied before setting grab_lock */
  15376. + smp_wmb();
  15377. +
  15378. + /* Grab the mutex before setting grab_lock */
  15379. + hotplug_lock(hp);
  15380. + hp->grab_lock = 1;
  15381. +
  15382. + /*
  15383. + * The CPU notifiers have been completed.
  15384. + * Wait for tasks to get out of pinned CPU sections and have new
  15385. + * tasks block until the CPU is completely down.
  15386. + */
  15387. + __cpu_unplug_sync(hp);
  15388. +
  15389. + /* All done with the sync thread */
  15390. + kthread_stop(hp->sync_tsk);
  15391. + hp->sync_tsk = NULL;
  15392. +}
  15393. +
  15394. +static void cpu_unplug_done(unsigned int cpu)
  15395. +{
  15396. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  15397. +
  15398. + hp->unplug = NULL;
  15399. + /* Let all tasks know cpu unplug is finished before cleaning up */
  15400. + smp_wmb();
  15401. +
  15402. + if (hp->sync_tsk)
  15403. + kthread_stop(hp->sync_tsk);
  15404. +
  15405. + if (hp->grab_lock) {
  15406. + hotplug_unlock(hp);
  15407. + /* protected by cpu_hotplug.lock */
  15408. + hp->grab_lock = 0;
  15409. + }
  15410. + tell_sched_cpu_down_done(cpu);
  15411. +}
  15412. void get_online_cpus(void)
  15413. {
  15414. @@ -338,13 +621,15 @@
  15415. /* Requires cpu_add_remove_lock to be held */
  15416. static int _cpu_down(unsigned int cpu, int tasks_frozen)
  15417. {
  15418. - int err, nr_calls = 0;
  15419. + int mycpu, err, nr_calls = 0;
  15420. void *hcpu = (void *)(long)cpu;
  15421. unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
  15422. struct take_cpu_down_param tcd_param = {
  15423. .mod = mod,
  15424. .hcpu = hcpu,
  15425. };
  15426. + cpumask_var_t cpumask;
  15427. + cpumask_var_t cpumask_org;
  15428. if (num_online_cpus() == 1)
  15429. return -EBUSY;
  15430. @@ -352,7 +637,34 @@
  15431. if (!cpu_online(cpu))
  15432. return -EINVAL;
  15433. + /* Move the downtaker off the unplug cpu */
  15434. + if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
  15435. + return -ENOMEM;
  15436. + if (!alloc_cpumask_var(&cpumask_org, GFP_KERNEL)) {
  15437. + free_cpumask_var(cpumask);
  15438. + return -ENOMEM;
  15439. + }
  15440. +
  15441. + cpumask_copy(cpumask_org, tsk_cpus_allowed(current));
  15442. + cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
  15443. + set_cpus_allowed_ptr(current, cpumask);
  15444. + free_cpumask_var(cpumask);
  15445. + migrate_disable();
  15446. + mycpu = smp_processor_id();
  15447. + if (mycpu == cpu) {
  15448. + printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
  15449. + migrate_enable();
  15450. + err = -EBUSY;
  15451. + goto restore_cpus;
  15452. + }
  15453. + migrate_enable();
  15454. +
  15455. cpu_hotplug_begin();
  15456. + err = cpu_unplug_begin(cpu);
  15457. + if (err) {
  15458. + printk("cpu_unplug_begin(%d) failed\n", cpu);
  15459. + goto out_cancel;
  15460. + }
  15461. err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
  15462. if (err) {
  15463. @@ -378,8 +690,12 @@
  15464. else
  15465. synchronize_rcu();
  15466. + __cpu_unplug_wait(cpu);
  15467. smpboot_park_threads(cpu);
  15468. + /* Notifiers are done. Don't let any more tasks pin this CPU. */
  15469. + cpu_unplug_sync(cpu);
  15470. +
  15471. /*
  15472. * Prevent irq alloc/free while the dying cpu reorganizes the
  15473. * interrupt affinities.
  15474. @@ -424,9 +740,14 @@
  15475. check_for_tasks(cpu);
  15476. out_release:
  15477. + cpu_unplug_done(cpu);
  15478. +out_cancel:
  15479. cpu_hotplug_done();
  15480. if (!err)
  15481. cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
  15482. +restore_cpus:
  15483. + set_cpus_allowed_ptr(current, cpumask_org);
  15484. + free_cpumask_var(cpumask_org);
  15485. return err;
  15486. }
  15487. diff -Nur linux-4.4.62.orig/kernel/cpuset.c linux-4.4.62/kernel/cpuset.c
  15488. --- linux-4.4.62.orig/kernel/cpuset.c 2017-04-18 07:15:37.000000000 +0200
  15489. +++ linux-4.4.62/kernel/cpuset.c 2017-04-18 17:38:08.206649710 +0200
  15490. @@ -283,7 +283,7 @@
  15491. */
  15492. static DEFINE_MUTEX(cpuset_mutex);
  15493. -static DEFINE_SPINLOCK(callback_lock);
  15494. +static DEFINE_RAW_SPINLOCK(callback_lock);
  15495. static struct workqueue_struct *cpuset_migrate_mm_wq;
  15496. @@ -906,9 +906,9 @@
  15497. continue;
  15498. rcu_read_unlock();
  15499. - spin_lock_irq(&callback_lock);
  15500. + raw_spin_lock_irq(&callback_lock);
  15501. cpumask_copy(cp->effective_cpus, new_cpus);
  15502. - spin_unlock_irq(&callback_lock);
  15503. + raw_spin_unlock_irq(&callback_lock);
  15504. WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
  15505. !cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
  15506. @@ -973,9 +973,9 @@
  15507. if (retval < 0)
  15508. return retval;
  15509. - spin_lock_irq(&callback_lock);
  15510. + raw_spin_lock_irq(&callback_lock);
  15511. cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
  15512. - spin_unlock_irq(&callback_lock);
  15513. + raw_spin_unlock_irq(&callback_lock);
  15514. /* use trialcs->cpus_allowed as a temp variable */
  15515. update_cpumasks_hier(cs, trialcs->cpus_allowed);
  15516. @@ -1184,9 +1184,9 @@
  15517. continue;
  15518. rcu_read_unlock();
  15519. - spin_lock_irq(&callback_lock);
  15520. + raw_spin_lock_irq(&callback_lock);
  15521. cp->effective_mems = *new_mems;
  15522. - spin_unlock_irq(&callback_lock);
  15523. + raw_spin_unlock_irq(&callback_lock);
  15524. WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
  15525. !nodes_equal(cp->mems_allowed, cp->effective_mems));
  15526. @@ -1254,9 +1254,9 @@
  15527. if (retval < 0)
  15528. goto done;
  15529. - spin_lock_irq(&callback_lock);
  15530. + raw_spin_lock_irq(&callback_lock);
  15531. cs->mems_allowed = trialcs->mems_allowed;
  15532. - spin_unlock_irq(&callback_lock);
  15533. + raw_spin_unlock_irq(&callback_lock);
  15534. /* use trialcs->mems_allowed as a temp variable */
  15535. update_nodemasks_hier(cs, &trialcs->mems_allowed);
  15536. @@ -1347,9 +1347,9 @@
  15537. spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
  15538. || (is_spread_page(cs) != is_spread_page(trialcs)));
  15539. - spin_lock_irq(&callback_lock);
  15540. + raw_spin_lock_irq(&callback_lock);
  15541. cs->flags = trialcs->flags;
  15542. - spin_unlock_irq(&callback_lock);
  15543. + raw_spin_unlock_irq(&callback_lock);
  15544. if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
  15545. rebuild_sched_domains_locked();
  15546. @@ -1761,7 +1761,7 @@
  15547. cpuset_filetype_t type = seq_cft(sf)->private;
  15548. int ret = 0;
  15549. - spin_lock_irq(&callback_lock);
  15550. + raw_spin_lock_irq(&callback_lock);
  15551. switch (type) {
  15552. case FILE_CPULIST:
  15553. @@ -1780,7 +1780,7 @@
  15554. ret = -EINVAL;
  15555. }
  15556. - spin_unlock_irq(&callback_lock);
  15557. + raw_spin_unlock_irq(&callback_lock);
  15558. return ret;
  15559. }
  15560. @@ -1994,12 +1994,12 @@
  15561. cpuset_inc();
  15562. - spin_lock_irq(&callback_lock);
  15563. + raw_spin_lock_irq(&callback_lock);
  15564. if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
  15565. cpumask_copy(cs->effective_cpus, parent->effective_cpus);
  15566. cs->effective_mems = parent->effective_mems;
  15567. }
  15568. - spin_unlock_irq(&callback_lock);
  15569. + raw_spin_unlock_irq(&callback_lock);
  15570. if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
  15571. goto out_unlock;
  15572. @@ -2026,12 +2026,12 @@
  15573. }
  15574. rcu_read_unlock();
  15575. - spin_lock_irq(&callback_lock);
  15576. + raw_spin_lock_irq(&callback_lock);
  15577. cs->mems_allowed = parent->mems_allowed;
  15578. cs->effective_mems = parent->mems_allowed;
  15579. cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
  15580. cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
  15581. - spin_unlock_irq(&callback_lock);
  15582. + raw_spin_unlock_irq(&callback_lock);
  15583. out_unlock:
  15584. mutex_unlock(&cpuset_mutex);
  15585. return 0;
  15586. @@ -2070,7 +2070,7 @@
  15587. static void cpuset_bind(struct cgroup_subsys_state *root_css)
  15588. {
  15589. mutex_lock(&cpuset_mutex);
  15590. - spin_lock_irq(&callback_lock);
  15591. + raw_spin_lock_irq(&callback_lock);
  15592. if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
  15593. cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
  15594. @@ -2081,7 +2081,7 @@
  15595. top_cpuset.mems_allowed = top_cpuset.effective_mems;
  15596. }
  15597. - spin_unlock_irq(&callback_lock);
  15598. + raw_spin_unlock_irq(&callback_lock);
  15599. mutex_unlock(&cpuset_mutex);
  15600. }
  15601. @@ -2182,12 +2182,12 @@
  15602. {
  15603. bool is_empty;
  15604. - spin_lock_irq(&callback_lock);
  15605. + raw_spin_lock_irq(&callback_lock);
  15606. cpumask_copy(cs->cpus_allowed, new_cpus);
  15607. cpumask_copy(cs->effective_cpus, new_cpus);
  15608. cs->mems_allowed = *new_mems;
  15609. cs->effective_mems = *new_mems;
  15610. - spin_unlock_irq(&callback_lock);
  15611. + raw_spin_unlock_irq(&callback_lock);
  15612. /*
  15613. * Don't call update_tasks_cpumask() if the cpuset becomes empty,
  15614. @@ -2224,10 +2224,10 @@
  15615. if (nodes_empty(*new_mems))
  15616. *new_mems = parent_cs(cs)->effective_mems;
  15617. - spin_lock_irq(&callback_lock);
  15618. + raw_spin_lock_irq(&callback_lock);
  15619. cpumask_copy(cs->effective_cpus, new_cpus);
  15620. cs->effective_mems = *new_mems;
  15621. - spin_unlock_irq(&callback_lock);
  15622. + raw_spin_unlock_irq(&callback_lock);
  15623. if (cpus_updated)
  15624. update_tasks_cpumask(cs);
  15625. @@ -2313,21 +2313,21 @@
  15626. /* synchronize cpus_allowed to cpu_active_mask */
  15627. if (cpus_updated) {
  15628. - spin_lock_irq(&callback_lock);
  15629. + raw_spin_lock_irq(&callback_lock);
  15630. if (!on_dfl)
  15631. cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
  15632. cpumask_copy(top_cpuset.effective_cpus, &new_cpus);
  15633. - spin_unlock_irq(&callback_lock);
  15634. + raw_spin_unlock_irq(&callback_lock);
  15635. /* we don't mess with cpumasks of tasks in top_cpuset */
  15636. }
  15637. /* synchronize mems_allowed to N_MEMORY */
  15638. if (mems_updated) {
  15639. - spin_lock_irq(&callback_lock);
  15640. + raw_spin_lock_irq(&callback_lock);
  15641. if (!on_dfl)
  15642. top_cpuset.mems_allowed = new_mems;
  15643. top_cpuset.effective_mems = new_mems;
  15644. - spin_unlock_irq(&callback_lock);
  15645. + raw_spin_unlock_irq(&callback_lock);
  15646. update_tasks_nodemask(&top_cpuset);
  15647. }
  15648. @@ -2425,11 +2425,11 @@
  15649. {
  15650. unsigned long flags;
  15651. - spin_lock_irqsave(&callback_lock, flags);
  15652. + raw_spin_lock_irqsave(&callback_lock, flags);
  15653. rcu_read_lock();
  15654. guarantee_online_cpus(task_cs(tsk), pmask);
  15655. rcu_read_unlock();
  15656. - spin_unlock_irqrestore(&callback_lock, flags);
  15657. + raw_spin_unlock_irqrestore(&callback_lock, flags);
  15658. }
  15659. void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
  15660. @@ -2477,11 +2477,11 @@
  15661. nodemask_t mask;
  15662. unsigned long flags;
  15663. - spin_lock_irqsave(&callback_lock, flags);
  15664. + raw_spin_lock_irqsave(&callback_lock, flags);
  15665. rcu_read_lock();
  15666. guarantee_online_mems(task_cs(tsk), &mask);
  15667. rcu_read_unlock();
  15668. - spin_unlock_irqrestore(&callback_lock, flags);
  15669. + raw_spin_unlock_irqrestore(&callback_lock, flags);
  15670. return mask;
  15671. }
  15672. @@ -2573,14 +2573,14 @@
  15673. return 1;
  15674. /* Not hardwall and node outside mems_allowed: scan up cpusets */
  15675. - spin_lock_irqsave(&callback_lock, flags);
  15676. + raw_spin_lock_irqsave(&callback_lock, flags);
  15677. rcu_read_lock();
  15678. cs = nearest_hardwall_ancestor(task_cs(current));
  15679. allowed = node_isset(node, cs->mems_allowed);
  15680. rcu_read_unlock();
  15681. - spin_unlock_irqrestore(&callback_lock, flags);
  15682. + raw_spin_unlock_irqrestore(&callback_lock, flags);
  15683. return allowed;
  15684. }
  15685. diff -Nur linux-4.4.62.orig/kernel/debug/kdb/kdb_io.c linux-4.4.62/kernel/debug/kdb/kdb_io.c
  15686. --- linux-4.4.62.orig/kernel/debug/kdb/kdb_io.c 2017-04-18 07:15:37.000000000 +0200
  15687. +++ linux-4.4.62/kernel/debug/kdb/kdb_io.c 2017-04-18 17:38:08.206649710 +0200
  15688. @@ -554,7 +554,6 @@
  15689. int linecount;
  15690. int colcount;
  15691. int logging, saved_loglevel = 0;
  15692. - int saved_trap_printk;
  15693. int got_printf_lock = 0;
  15694. int retlen = 0;
  15695. int fnd, len;
  15696. @@ -565,8 +564,6 @@
  15697. unsigned long uninitialized_var(flags);
  15698. preempt_disable();
  15699. - saved_trap_printk = kdb_trap_printk;
  15700. - kdb_trap_printk = 0;
  15701. /* Serialize kdb_printf if multiple cpus try to write at once.
  15702. * But if any cpu goes recursive in kdb, just print the output,
  15703. @@ -855,7 +852,6 @@
  15704. } else {
  15705. __release(kdb_printf_lock);
  15706. }
  15707. - kdb_trap_printk = saved_trap_printk;
  15708. preempt_enable();
  15709. return retlen;
  15710. }
  15711. @@ -865,9 +861,11 @@
  15712. va_list ap;
  15713. int r;
  15714. + kdb_trap_printk++;
  15715. va_start(ap, fmt);
  15716. r = vkdb_printf(KDB_MSGSRC_INTERNAL, fmt, ap);
  15717. va_end(ap);
  15718. + kdb_trap_printk--;
  15719. return r;
  15720. }
  15721. diff -Nur linux-4.4.62.orig/kernel/events/core.c linux-4.4.62/kernel/events/core.c
  15722. --- linux-4.4.62.orig/kernel/events/core.c 2017-04-18 07:15:37.000000000 +0200
  15723. +++ linux-4.4.62/kernel/events/core.c 2017-04-18 17:38:08.206649710 +0200
  15724. @@ -802,6 +802,7 @@
  15725. raw_spin_lock_init(&cpuctx->hrtimer_lock);
  15726. hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
  15727. timer->function = perf_mux_hrtimer_handler;
  15728. + timer->irqsafe = 1;
  15729. }
  15730. static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
  15731. @@ -7240,6 +7241,7 @@
  15732. hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  15733. hwc->hrtimer.function = perf_swevent_hrtimer;
  15734. + hwc->hrtimer.irqsafe = 1;
  15735. /*
  15736. * Since hrtimers have a fixed rate, we can do a static freq->period
  15737. diff -Nur linux-4.4.62.orig/kernel/exit.c linux-4.4.62/kernel/exit.c
  15738. --- linux-4.4.62.orig/kernel/exit.c 2017-04-18 07:15:37.000000000 +0200
  15739. +++ linux-4.4.62/kernel/exit.c 2017-04-18 17:38:08.206649710 +0200
  15740. @@ -144,7 +144,7 @@
  15741. * Do this under ->siglock, we can race with another thread
  15742. * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
  15743. */
  15744. - flush_sigqueue(&tsk->pending);
  15745. + flush_task_sigqueue(tsk);
  15746. tsk->sighand = NULL;
  15747. spin_unlock(&sighand->siglock);
  15748. diff -Nur linux-4.4.62.orig/kernel/fork.c linux-4.4.62/kernel/fork.c
  15749. --- linux-4.4.62.orig/kernel/fork.c 2017-04-18 07:15:37.000000000 +0200
  15750. +++ linux-4.4.62/kernel/fork.c 2017-04-18 17:38:08.206649710 +0200
  15751. @@ -108,7 +108,7 @@
  15752. DEFINE_PER_CPU(unsigned long, process_counts) = 0;
  15753. -__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
  15754. +DEFINE_RWLOCK(tasklist_lock); /* outer */
  15755. #ifdef CONFIG_PROVE_RCU
  15756. int lockdep_tasklist_lock_is_held(void)
  15757. @@ -244,7 +244,9 @@
  15758. if (atomic_dec_and_test(&sig->sigcnt))
  15759. free_signal_struct(sig);
  15760. }
  15761. -
  15762. +#ifdef CONFIG_PREEMPT_RT_BASE
  15763. +static
  15764. +#endif
  15765. void __put_task_struct(struct task_struct *tsk)
  15766. {
  15767. WARN_ON(!tsk->exit_state);
  15768. @@ -261,7 +263,18 @@
  15769. if (!profile_handoff_task(tsk))
  15770. free_task(tsk);
  15771. }
  15772. +#ifndef CONFIG_PREEMPT_RT_BASE
  15773. EXPORT_SYMBOL_GPL(__put_task_struct);
  15774. +#else
  15775. +void __put_task_struct_cb(struct rcu_head *rhp)
  15776. +{
  15777. + struct task_struct *tsk = container_of(rhp, struct task_struct, put_rcu);
  15778. +
  15779. + __put_task_struct(tsk);
  15780. +
  15781. +}
  15782. +EXPORT_SYMBOL_GPL(__put_task_struct_cb);
  15783. +#endif
  15784. void __init __weak arch_task_cache_init(void) { }
  15785. @@ -693,6 +706,19 @@
  15786. }
  15787. EXPORT_SYMBOL_GPL(__mmdrop);
  15788. +#ifdef CONFIG_PREEMPT_RT_BASE
  15789. +/*
  15790. + * RCU callback for delayed mm drop. Not strictly rcu, but we don't
  15791. + * want another facility to make this work.
  15792. + */
  15793. +void __mmdrop_delayed(struct rcu_head *rhp)
  15794. +{
  15795. + struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop);
  15796. +
  15797. + __mmdrop(mm);
  15798. +}
  15799. +#endif
  15800. +
  15801. /*
  15802. * Decrement the use count and release all resources for an mm.
  15803. */
  15804. @@ -1243,6 +1269,9 @@
  15805. */
  15806. static void posix_cpu_timers_init(struct task_struct *tsk)
  15807. {
  15808. +#ifdef CONFIG_PREEMPT_RT_BASE
  15809. + tsk->posix_timer_list = NULL;
  15810. +#endif
  15811. tsk->cputime_expires.prof_exp = 0;
  15812. tsk->cputime_expires.virt_exp = 0;
  15813. tsk->cputime_expires.sched_exp = 0;
  15814. @@ -1369,15 +1398,16 @@
  15815. spin_lock_init(&p->alloc_lock);
  15816. init_sigpending(&p->pending);
  15817. + p->sigqueue_cache = NULL;
  15818. p->utime = p->stime = p->gtime = 0;
  15819. p->utimescaled = p->stimescaled = 0;
  15820. prev_cputime_init(&p->prev_cputime);
  15821. #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
  15822. - seqlock_init(&p->vtime_seqlock);
  15823. + seqcount_init(&p->vtime_seqcount);
  15824. p->vtime_snap = 0;
  15825. - p->vtime_snap_whence = VTIME_SLEEPING;
  15826. + p->vtime_snap_whence = VTIME_INACTIVE;
  15827. #endif
  15828. #if defined(SPLIT_RSS_COUNTING)
  15829. diff -Nur linux-4.4.62.orig/kernel/futex.c linux-4.4.62/kernel/futex.c
  15830. --- linux-4.4.62.orig/kernel/futex.c 2017-04-18 07:15:37.000000000 +0200
  15831. +++ linux-4.4.62/kernel/futex.c 2017-04-18 17:38:08.206649710 +0200
  15832. @@ -815,7 +815,9 @@
  15833. * task still owns the PI-state:
  15834. */
  15835. if (head->next != next) {
  15836. + raw_spin_unlock_irq(&curr->pi_lock);
  15837. spin_unlock(&hb->lock);
  15838. + raw_spin_lock_irq(&curr->pi_lock);
  15839. continue;
  15840. }
  15841. @@ -1210,6 +1212,7 @@
  15842. struct futex_pi_state *pi_state = this->pi_state;
  15843. u32 uninitialized_var(curval), newval;
  15844. WAKE_Q(wake_q);
  15845. + WAKE_Q(wake_sleeper_q);
  15846. bool deboost;
  15847. int ret = 0;
  15848. @@ -1223,7 +1226,7 @@
  15849. if (pi_state->owner != current)
  15850. return -EINVAL;
  15851. - raw_spin_lock(&pi_state->pi_mutex.wait_lock);
  15852. + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
  15853. new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
  15854. /*
  15855. @@ -1259,24 +1262,25 @@
  15856. ret = -EINVAL;
  15857. }
  15858. if (ret) {
  15859. - raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
  15860. + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
  15861. return ret;
  15862. }
  15863. - raw_spin_lock_irq(&pi_state->owner->pi_lock);
  15864. + raw_spin_lock(&pi_state->owner->pi_lock);
  15865. WARN_ON(list_empty(&pi_state->list));
  15866. list_del_init(&pi_state->list);
  15867. - raw_spin_unlock_irq(&pi_state->owner->pi_lock);
  15868. + raw_spin_unlock(&pi_state->owner->pi_lock);
  15869. - raw_spin_lock_irq(&new_owner->pi_lock);
  15870. + raw_spin_lock(&new_owner->pi_lock);
  15871. WARN_ON(!list_empty(&pi_state->list));
  15872. list_add(&pi_state->list, &new_owner->pi_state_list);
  15873. pi_state->owner = new_owner;
  15874. - raw_spin_unlock_irq(&new_owner->pi_lock);
  15875. + raw_spin_unlock(&new_owner->pi_lock);
  15876. - raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
  15877. + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
  15878. - deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
  15879. + deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q,
  15880. + &wake_sleeper_q);
  15881. /*
  15882. * First unlock HB so the waiter does not spin on it once he got woken
  15883. @@ -1284,8 +1288,9 @@
  15884. * deboost first (and lose our higher priority), then the task might get
  15885. * scheduled away before the wake up can take place.
  15886. */
  15887. - spin_unlock(&hb->lock);
  15888. + deboost |= spin_unlock_no_deboost(&hb->lock);
  15889. wake_up_q(&wake_q);
  15890. + wake_up_q_sleeper(&wake_sleeper_q);
  15891. if (deboost)
  15892. rt_mutex_adjust_prio(current);
  15893. @@ -1822,6 +1827,16 @@
  15894. requeue_pi_wake_futex(this, &key2, hb2);
  15895. drop_count++;
  15896. continue;
  15897. + } else if (ret == -EAGAIN) {
  15898. + /*
  15899. + * Waiter was woken by timeout or
  15900. + * signal and has set pi_blocked_on to
  15901. + * PI_WAKEUP_INPROGRESS before we
  15902. + * tried to enqueue it on the rtmutex.
  15903. + */
  15904. + this->pi_state = NULL;
  15905. + free_pi_state(pi_state);
  15906. + continue;
  15907. } else if (ret) {
  15908. /* -EDEADLK */
  15909. this->pi_state = NULL;
  15910. @@ -2139,11 +2154,11 @@
  15911. * we returned due to timeout or signal without taking the
  15912. * rt_mutex. Too late.
  15913. */
  15914. - raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
  15915. + raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock);
  15916. owner = rt_mutex_owner(&q->pi_state->pi_mutex);
  15917. if (!owner)
  15918. owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
  15919. - raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
  15920. + raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock);
  15921. ret = fixup_pi_state_owner(uaddr, q, owner);
  15922. goto out;
  15923. }
  15924. @@ -2690,7 +2705,7 @@
  15925. {
  15926. struct hrtimer_sleeper timeout, *to = NULL;
  15927. struct rt_mutex_waiter rt_waiter;
  15928. - struct futex_hash_bucket *hb;
  15929. + struct futex_hash_bucket *hb, *hb2;
  15930. union futex_key key2 = FUTEX_KEY_INIT;
  15931. struct futex_q q = futex_q_init;
  15932. int res, ret;
  15933. @@ -2715,10 +2730,7 @@
  15934. * The waiter is allocated on our stack, manipulated by the requeue
  15935. * code while we sleep on uaddr.
  15936. */
  15937. - debug_rt_mutex_init_waiter(&rt_waiter);
  15938. - RB_CLEAR_NODE(&rt_waiter.pi_tree_entry);
  15939. - RB_CLEAR_NODE(&rt_waiter.tree_entry);
  15940. - rt_waiter.task = NULL;
  15941. + rt_mutex_init_waiter(&rt_waiter, false);
  15942. ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
  15943. if (unlikely(ret != 0))
  15944. @@ -2749,20 +2761,55 @@
  15945. /* Queue the futex_q, drop the hb lock, wait for wakeup. */
  15946. futex_wait_queue_me(hb, &q, to);
  15947. - spin_lock(&hb->lock);
  15948. - ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
  15949. - spin_unlock(&hb->lock);
  15950. - if (ret)
  15951. - goto out_put_keys;
  15952. + /*
  15953. + * On RT we must avoid races with requeue and trying to block
  15954. + * on two mutexes (hb->lock and uaddr2's rtmutex) by
  15955. + * serializing access to pi_blocked_on with pi_lock.
  15956. + */
  15957. + raw_spin_lock_irq(&current->pi_lock);
  15958. + if (current->pi_blocked_on) {
  15959. + /*
  15960. + * We have been requeued or are in the process of
  15961. + * being requeued.
  15962. + */
  15963. + raw_spin_unlock_irq(&current->pi_lock);
  15964. + } else {
  15965. + /*
  15966. + * Setting pi_blocked_on to PI_WAKEUP_INPROGRESS
  15967. + * prevents a concurrent requeue from moving us to the
  15968. + * uaddr2 rtmutex. After that we can safely acquire
  15969. + * (and possibly block on) hb->lock.
  15970. + */
  15971. + current->pi_blocked_on = PI_WAKEUP_INPROGRESS;
  15972. + raw_spin_unlock_irq(&current->pi_lock);
  15973. +
  15974. + spin_lock(&hb->lock);
  15975. +
  15976. + /*
  15977. + * Clean up pi_blocked_on. We might leak it otherwise
  15978. + * when we succeeded with the hb->lock in the fast
  15979. + * path.
  15980. + */
  15981. + raw_spin_lock_irq(&current->pi_lock);
  15982. + current->pi_blocked_on = NULL;
  15983. + raw_spin_unlock_irq(&current->pi_lock);
  15984. +
  15985. + ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
  15986. + spin_unlock(&hb->lock);
  15987. + if (ret)
  15988. + goto out_put_keys;
  15989. + }
  15990. /*
  15991. - * In order for us to be here, we know our q.key == key2, and since
  15992. - * we took the hb->lock above, we also know that futex_requeue() has
  15993. - * completed and we no longer have to concern ourselves with a wakeup
  15994. - * race with the atomic proxy lock acquisition by the requeue code. The
  15995. - * futex_requeue dropped our key1 reference and incremented our key2
  15996. - * reference count.
  15997. + * In order to be here, we have either been requeued, are in
  15998. + * the process of being requeued, or requeue successfully
  15999. + * acquired uaddr2 on our behalf. If pi_blocked_on was
  16000. + * non-null above, we may be racing with a requeue. Do not
  16001. + * rely on q->lock_ptr to be hb2->lock until after blocking on
  16002. + * hb->lock or hb2->lock. The futex_requeue dropped our key1
  16003. + * reference and incremented our key2 reference count.
  16004. */
  16005. + hb2 = hash_futex(&key2);
  16006. /* Check if the requeue code acquired the second futex for us. */
  16007. if (!q.rt_waiter) {
  16008. @@ -2771,7 +2818,8 @@
  16009. * did a lock-steal - fix up the PI-state in that case.
  16010. */
  16011. if (q.pi_state && (q.pi_state->owner != current)) {
  16012. - spin_lock(q.lock_ptr);
  16013. + spin_lock(&hb2->lock);
  16014. + BUG_ON(&hb2->lock != q.lock_ptr);
  16015. ret = fixup_pi_state_owner(uaddr2, &q, current);
  16016. if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current)
  16017. rt_mutex_unlock(&q.pi_state->pi_mutex);
  16018. @@ -2780,7 +2828,7 @@
  16019. * the requeue_pi() code acquired for us.
  16020. */
  16021. free_pi_state(q.pi_state);
  16022. - spin_unlock(q.lock_ptr);
  16023. + spin_unlock(&hb2->lock);
  16024. }
  16025. } else {
  16026. struct rt_mutex *pi_mutex;
  16027. @@ -2795,7 +2843,8 @@
  16028. ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter);
  16029. debug_rt_mutex_free_waiter(&rt_waiter);
  16030. - spin_lock(q.lock_ptr);
  16031. + spin_lock(&hb2->lock);
  16032. + BUG_ON(&hb2->lock != q.lock_ptr);
  16033. /*
  16034. * Fixup the pi_state owner and possibly acquire the lock if we
  16035. * haven't already.
  16036. diff -Nur linux-4.4.62.orig/kernel/irq/handle.c linux-4.4.62/kernel/irq/handle.c
  16037. --- linux-4.4.62.orig/kernel/irq/handle.c 2017-04-18 07:15:37.000000000 +0200
  16038. +++ linux-4.4.62/kernel/irq/handle.c 2017-04-18 17:38:08.206649710 +0200
  16039. @@ -134,6 +134,8 @@
  16040. irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
  16041. {
  16042. + struct pt_regs *regs = get_irq_regs();
  16043. + u64 ip = regs ? instruction_pointer(regs) : 0;
  16044. irqreturn_t retval = IRQ_NONE;
  16045. unsigned int flags = 0, irq = desc->irq_data.irq;
  16046. struct irqaction *action = desc->action;
  16047. @@ -176,7 +178,11 @@
  16048. action = action->next;
  16049. }
  16050. - add_interrupt_randomness(irq, flags);
  16051. +#ifdef CONFIG_PREEMPT_RT_FULL
  16052. + desc->random_ip = ip;
  16053. +#else
  16054. + add_interrupt_randomness(irq, flags, ip);
  16055. +#endif
  16056. if (!noirqdebug)
  16057. note_interrupt(desc, retval);
  16058. diff -Nur linux-4.4.62.orig/kernel/irq/irqdesc.c linux-4.4.62/kernel/irq/irqdesc.c
  16059. --- linux-4.4.62.orig/kernel/irq/irqdesc.c 2017-04-18 07:15:37.000000000 +0200
  16060. +++ linux-4.4.62/kernel/irq/irqdesc.c 2017-04-18 17:38:08.206649710 +0200
  16061. @@ -24,10 +24,27 @@
  16062. static struct lock_class_key irq_desc_lock_class;
  16063. #if defined(CONFIG_SMP)
  16064. +static int __init irq_affinity_setup(char *str)
  16065. +{
  16066. + zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
  16067. + cpulist_parse(str, irq_default_affinity);
  16068. + /*
  16069. + * Set at least the boot cpu. We don't want to end up with
  16070. + * bugreports caused by random comandline masks
  16071. + */
  16072. + cpumask_set_cpu(smp_processor_id(), irq_default_affinity);
  16073. + return 1;
  16074. +}
  16075. +__setup("irqaffinity=", irq_affinity_setup);
  16076. +
  16077. static void __init init_irq_default_affinity(void)
  16078. {
  16079. - alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
  16080. - cpumask_setall(irq_default_affinity);
  16081. +#ifdef CONFIG_CPUMASK_OFFSTACK
  16082. + if (!irq_default_affinity)
  16083. + zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
  16084. +#endif
  16085. + if (cpumask_empty(irq_default_affinity))
  16086. + cpumask_setall(irq_default_affinity);
  16087. }
  16088. #else
  16089. static void __init init_irq_default_affinity(void)
  16090. diff -Nur linux-4.4.62.orig/kernel/irq/manage.c linux-4.4.62/kernel/irq/manage.c
  16091. --- linux-4.4.62.orig/kernel/irq/manage.c 2017-04-18 07:15:37.000000000 +0200
  16092. +++ linux-4.4.62/kernel/irq/manage.c 2017-04-18 17:38:08.206649710 +0200
  16093. @@ -22,6 +22,7 @@
  16094. #include "internals.h"
  16095. #ifdef CONFIG_IRQ_FORCED_THREADING
  16096. +# ifndef CONFIG_PREEMPT_RT_BASE
  16097. __read_mostly bool force_irqthreads;
  16098. static int __init setup_forced_irqthreads(char *arg)
  16099. @@ -30,6 +31,7 @@
  16100. return 0;
  16101. }
  16102. early_param("threadirqs", setup_forced_irqthreads);
  16103. +# endif
  16104. #endif
  16105. static void __synchronize_hardirq(struct irq_desc *desc)
  16106. @@ -181,6 +183,62 @@
  16107. irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { }
  16108. #endif
  16109. +#ifdef CONFIG_PREEMPT_RT_FULL
  16110. +static void _irq_affinity_notify(struct irq_affinity_notify *notify);
  16111. +static struct task_struct *set_affinity_helper;
  16112. +static LIST_HEAD(affinity_list);
  16113. +static DEFINE_RAW_SPINLOCK(affinity_list_lock);
  16114. +
  16115. +static int set_affinity_thread(void *unused)
  16116. +{
  16117. + while (1) {
  16118. + struct irq_affinity_notify *notify;
  16119. + int empty;
  16120. +
  16121. + set_current_state(TASK_INTERRUPTIBLE);
  16122. +
  16123. + raw_spin_lock_irq(&affinity_list_lock);
  16124. + empty = list_empty(&affinity_list);
  16125. + raw_spin_unlock_irq(&affinity_list_lock);
  16126. +
  16127. + if (empty)
  16128. + schedule();
  16129. + if (kthread_should_stop())
  16130. + break;
  16131. + set_current_state(TASK_RUNNING);
  16132. +try_next:
  16133. + notify = NULL;
  16134. +
  16135. + raw_spin_lock_irq(&affinity_list_lock);
  16136. + if (!list_empty(&affinity_list)) {
  16137. + notify = list_first_entry(&affinity_list,
  16138. + struct irq_affinity_notify, list);
  16139. + list_del_init(&notify->list);
  16140. + }
  16141. + raw_spin_unlock_irq(&affinity_list_lock);
  16142. +
  16143. + if (!notify)
  16144. + continue;
  16145. + _irq_affinity_notify(notify);
  16146. + goto try_next;
  16147. + }
  16148. + return 0;
  16149. +}
  16150. +
  16151. +static void init_helper_thread(void)
  16152. +{
  16153. + if (set_affinity_helper)
  16154. + return;
  16155. + set_affinity_helper = kthread_run(set_affinity_thread, NULL,
  16156. + "affinity-cb");
  16157. + WARN_ON(IS_ERR(set_affinity_helper));
  16158. +}
  16159. +#else
  16160. +
  16161. +static inline void init_helper_thread(void) { }
  16162. +
  16163. +#endif
  16164. +
  16165. int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
  16166. bool force)
  16167. {
  16168. @@ -220,7 +278,17 @@
  16169. if (desc->affinity_notify) {
  16170. kref_get(&desc->affinity_notify->kref);
  16171. +
  16172. +#ifdef CONFIG_PREEMPT_RT_FULL
  16173. + raw_spin_lock(&affinity_list_lock);
  16174. + if (list_empty(&desc->affinity_notify->list))
  16175. + list_add_tail(&affinity_list,
  16176. + &desc->affinity_notify->list);
  16177. + raw_spin_unlock(&affinity_list_lock);
  16178. + wake_up_process(set_affinity_helper);
  16179. +#else
  16180. schedule_work(&desc->affinity_notify->work);
  16181. +#endif
  16182. }
  16183. irqd_set(data, IRQD_AFFINITY_SET);
  16184. @@ -258,10 +326,8 @@
  16185. }
  16186. EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
  16187. -static void irq_affinity_notify(struct work_struct *work)
  16188. +static void _irq_affinity_notify(struct irq_affinity_notify *notify)
  16189. {
  16190. - struct irq_affinity_notify *notify =
  16191. - container_of(work, struct irq_affinity_notify, work);
  16192. struct irq_desc *desc = irq_to_desc(notify->irq);
  16193. cpumask_var_t cpumask;
  16194. unsigned long flags;
  16195. @@ -283,6 +349,13 @@
  16196. kref_put(&notify->kref, notify->release);
  16197. }
  16198. +static void irq_affinity_notify(struct work_struct *work)
  16199. +{
  16200. + struct irq_affinity_notify *notify =
  16201. + container_of(work, struct irq_affinity_notify, work);
  16202. + _irq_affinity_notify(notify);
  16203. +}
  16204. +
  16205. /**
  16206. * irq_set_affinity_notifier - control notification of IRQ affinity changes
  16207. * @irq: Interrupt for which to enable/disable notification
  16208. @@ -312,6 +385,8 @@
  16209. notify->irq = irq;
  16210. kref_init(&notify->kref);
  16211. INIT_WORK(&notify->work, irq_affinity_notify);
  16212. + INIT_LIST_HEAD(&notify->list);
  16213. + init_helper_thread();
  16214. }
  16215. raw_spin_lock_irqsave(&desc->lock, flags);
  16216. @@ -865,7 +940,15 @@
  16217. local_bh_disable();
  16218. ret = action->thread_fn(action->irq, action->dev_id);
  16219. irq_finalize_oneshot(desc, action);
  16220. - local_bh_enable();
  16221. + /*
  16222. + * Interrupts which have real time requirements can be set up
  16223. + * to avoid softirq processing in the thread handler. This is
  16224. + * safe as these interrupts do not raise soft interrupts.
  16225. + */
  16226. + if (irq_settings_no_softirq_call(desc))
  16227. + _local_bh_enable();
  16228. + else
  16229. + local_bh_enable();
  16230. return ret;
  16231. }
  16232. @@ -962,6 +1045,12 @@
  16233. if (action_ret == IRQ_WAKE_THREAD)
  16234. irq_wake_secondary(desc, action);
  16235. +#ifdef CONFIG_PREEMPT_RT_FULL
  16236. + migrate_disable();
  16237. + add_interrupt_randomness(action->irq, 0,
  16238. + desc->random_ip ^ (unsigned long) action);
  16239. + migrate_enable();
  16240. +#endif
  16241. wake_threads_waitq(desc);
  16242. }
  16243. @@ -1315,6 +1404,9 @@
  16244. irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
  16245. }
  16246. + if (new->flags & IRQF_NO_SOFTIRQ_CALL)
  16247. + irq_settings_set_no_softirq_call(desc);
  16248. +
  16249. /* Set default affinity mask once everything is setup */
  16250. setup_affinity(desc, mask);
  16251. @@ -1968,7 +2060,7 @@
  16252. * This call sets the internal irqchip state of an interrupt,
  16253. * depending on the value of @which.
  16254. *
  16255. - * This function should be called with preemption disabled if the
  16256. + * This function should be called with migration disabled if the
  16257. * interrupt controller has per-cpu registers.
  16258. */
  16259. int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
  16260. diff -Nur linux-4.4.62.orig/kernel/irq/settings.h linux-4.4.62/kernel/irq/settings.h
  16261. --- linux-4.4.62.orig/kernel/irq/settings.h 2017-04-18 07:15:37.000000000 +0200
  16262. +++ linux-4.4.62/kernel/irq/settings.h 2017-04-18 17:38:08.206649710 +0200
  16263. @@ -16,6 +16,7 @@
  16264. _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID,
  16265. _IRQ_IS_POLLED = IRQ_IS_POLLED,
  16266. _IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY,
  16267. + _IRQ_NO_SOFTIRQ_CALL = IRQ_NO_SOFTIRQ_CALL,
  16268. _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK,
  16269. };
  16270. @@ -30,6 +31,7 @@
  16271. #define IRQ_PER_CPU_DEVID GOT_YOU_MORON
  16272. #define IRQ_IS_POLLED GOT_YOU_MORON
  16273. #define IRQ_DISABLE_UNLAZY GOT_YOU_MORON
  16274. +#define IRQ_NO_SOFTIRQ_CALL GOT_YOU_MORON
  16275. #undef IRQF_MODIFY_MASK
  16276. #define IRQF_MODIFY_MASK GOT_YOU_MORON
  16277. @@ -40,6 +42,16 @@
  16278. desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK);
  16279. }
  16280. +static inline bool irq_settings_no_softirq_call(struct irq_desc *desc)
  16281. +{
  16282. + return desc->status_use_accessors & _IRQ_NO_SOFTIRQ_CALL;
  16283. +}
  16284. +
  16285. +static inline void irq_settings_set_no_softirq_call(struct irq_desc *desc)
  16286. +{
  16287. + desc->status_use_accessors |= _IRQ_NO_SOFTIRQ_CALL;
  16288. +}
  16289. +
  16290. static inline bool irq_settings_is_per_cpu(struct irq_desc *desc)
  16291. {
  16292. return desc->status_use_accessors & _IRQ_PER_CPU;
  16293. diff -Nur linux-4.4.62.orig/kernel/irq/spurious.c linux-4.4.62/kernel/irq/spurious.c
  16294. --- linux-4.4.62.orig/kernel/irq/spurious.c 2017-04-18 07:15:37.000000000 +0200
  16295. +++ linux-4.4.62/kernel/irq/spurious.c 2017-04-18 17:38:08.206649710 +0200
  16296. @@ -444,6 +444,10 @@
  16297. static int __init irqfixup_setup(char *str)
  16298. {
  16299. +#ifdef CONFIG_PREEMPT_RT_BASE
  16300. + pr_warn("irqfixup boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
  16301. + return 1;
  16302. +#endif
  16303. irqfixup = 1;
  16304. printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
  16305. printk(KERN_WARNING "This may impact system performance.\n");
  16306. @@ -456,6 +460,10 @@
  16307. static int __init irqpoll_setup(char *str)
  16308. {
  16309. +#ifdef CONFIG_PREEMPT_RT_BASE
  16310. + pr_warn("irqpoll boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
  16311. + return 1;
  16312. +#endif
  16313. irqfixup = 2;
  16314. printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
  16315. "enabled\n");
  16316. diff -Nur linux-4.4.62.orig/kernel/irq_work.c linux-4.4.62/kernel/irq_work.c
  16317. --- linux-4.4.62.orig/kernel/irq_work.c 2017-04-18 07:15:37.000000000 +0200
  16318. +++ linux-4.4.62/kernel/irq_work.c 2017-04-18 17:38:08.210649865 +0200
  16319. @@ -17,6 +17,7 @@
  16320. #include <linux/cpu.h>
  16321. #include <linux/notifier.h>
  16322. #include <linux/smp.h>
  16323. +#include <linux/interrupt.h>
  16324. #include <asm/processor.h>
  16325. @@ -65,6 +66,8 @@
  16326. */
  16327. bool irq_work_queue_on(struct irq_work *work, int cpu)
  16328. {
  16329. + struct llist_head *list;
  16330. +
  16331. /* All work should have been flushed before going offline */
  16332. WARN_ON_ONCE(cpu_is_offline(cpu));
  16333. @@ -75,7 +78,12 @@
  16334. if (!irq_work_claim(work))
  16335. return false;
  16336. - if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
  16337. + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && !(work->flags & IRQ_WORK_HARD_IRQ))
  16338. + list = &per_cpu(lazy_list, cpu);
  16339. + else
  16340. + list = &per_cpu(raised_list, cpu);
  16341. +
  16342. + if (llist_add(&work->llnode, list))
  16343. arch_send_call_function_single_ipi(cpu);
  16344. return true;
  16345. @@ -86,6 +94,9 @@
  16346. /* Enqueue the irq work @work on the current CPU */
  16347. bool irq_work_queue(struct irq_work *work)
  16348. {
  16349. + struct llist_head *list;
  16350. + bool lazy_work, realtime = IS_ENABLED(CONFIG_PREEMPT_RT_FULL);
  16351. +
  16352. /* Only queue if not already pending */
  16353. if (!irq_work_claim(work))
  16354. return false;
  16355. @@ -93,13 +104,15 @@
  16356. /* Queue the entry and raise the IPI if needed. */
  16357. preempt_disable();
  16358. - /* If the work is "lazy", handle it from next tick if any */
  16359. - if (work->flags & IRQ_WORK_LAZY) {
  16360. - if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) &&
  16361. - tick_nohz_tick_stopped())
  16362. - arch_irq_work_raise();
  16363. - } else {
  16364. - if (llist_add(&work->llnode, this_cpu_ptr(&raised_list)))
  16365. + lazy_work = work->flags & IRQ_WORK_LAZY;
  16366. +
  16367. + if (lazy_work || (realtime && !(work->flags & IRQ_WORK_HARD_IRQ)))
  16368. + list = this_cpu_ptr(&lazy_list);
  16369. + else
  16370. + list = this_cpu_ptr(&raised_list);
  16371. +
  16372. + if (llist_add(&work->llnode, list)) {
  16373. + if (!lazy_work || tick_nohz_tick_stopped())
  16374. arch_irq_work_raise();
  16375. }
  16376. @@ -116,9 +129,8 @@
  16377. raised = this_cpu_ptr(&raised_list);
  16378. lazy = this_cpu_ptr(&lazy_list);
  16379. - if (llist_empty(raised) || arch_irq_work_has_interrupt())
  16380. - if (llist_empty(lazy))
  16381. - return false;
  16382. + if (llist_empty(raised) && llist_empty(lazy))
  16383. + return false;
  16384. /* All work should have been flushed before going offline */
  16385. WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
  16386. @@ -132,7 +144,7 @@
  16387. struct irq_work *work;
  16388. struct llist_node *llnode;
  16389. - BUG_ON(!irqs_disabled());
  16390. + BUG_ON_NONRT(!irqs_disabled());
  16391. if (llist_empty(list))
  16392. return;
  16393. @@ -169,7 +181,16 @@
  16394. void irq_work_run(void)
  16395. {
  16396. irq_work_run_list(this_cpu_ptr(&raised_list));
  16397. - irq_work_run_list(this_cpu_ptr(&lazy_list));
  16398. + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) {
  16399. + /*
  16400. + * NOTE: we raise softirq via IPI for safety,
  16401. + * and execute in irq_work_tick() to move the
  16402. + * overhead from hard to soft irq context.
  16403. + */
  16404. + if (!llist_empty(this_cpu_ptr(&lazy_list)))
  16405. + raise_softirq(TIMER_SOFTIRQ);
  16406. + } else
  16407. + irq_work_run_list(this_cpu_ptr(&lazy_list));
  16408. }
  16409. EXPORT_SYMBOL_GPL(irq_work_run);
  16410. @@ -179,8 +200,17 @@
  16411. if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
  16412. irq_work_run_list(raised);
  16413. +
  16414. + if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL))
  16415. + irq_work_run_list(this_cpu_ptr(&lazy_list));
  16416. +}
  16417. +
  16418. +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
  16419. +void irq_work_tick_soft(void)
  16420. +{
  16421. irq_work_run_list(this_cpu_ptr(&lazy_list));
  16422. }
  16423. +#endif
  16424. /*
  16425. * Synchronize against the irq_work @entry, ensures the entry is not
  16426. diff -Nur linux-4.4.62.orig/kernel/Kconfig.locks linux-4.4.62/kernel/Kconfig.locks
  16427. --- linux-4.4.62.orig/kernel/Kconfig.locks 2017-04-18 07:15:37.000000000 +0200
  16428. +++ linux-4.4.62/kernel/Kconfig.locks 2017-04-18 17:38:08.202649555 +0200
  16429. @@ -225,11 +225,11 @@
  16430. config MUTEX_SPIN_ON_OWNER
  16431. def_bool y
  16432. - depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW
  16433. + depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL
  16434. config RWSEM_SPIN_ON_OWNER
  16435. def_bool y
  16436. - depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
  16437. + depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL
  16438. config LOCK_SPIN_ON_OWNER
  16439. def_bool y
  16440. diff -Nur linux-4.4.62.orig/kernel/Kconfig.preempt linux-4.4.62/kernel/Kconfig.preempt
  16441. --- linux-4.4.62.orig/kernel/Kconfig.preempt 2017-04-18 07:15:37.000000000 +0200
  16442. +++ linux-4.4.62/kernel/Kconfig.preempt 2017-04-18 17:38:08.202649555 +0200
  16443. @@ -1,3 +1,16 @@
  16444. +config PREEMPT
  16445. + bool
  16446. + select PREEMPT_COUNT
  16447. +
  16448. +config PREEMPT_RT_BASE
  16449. + bool
  16450. + select PREEMPT
  16451. +
  16452. +config HAVE_PREEMPT_LAZY
  16453. + bool
  16454. +
  16455. +config PREEMPT_LAZY
  16456. + def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT_FULL
  16457. choice
  16458. prompt "Preemption Model"
  16459. @@ -33,9 +46,9 @@
  16460. Select this if you are building a kernel for a desktop system.
  16461. -config PREEMPT
  16462. +config PREEMPT__LL
  16463. bool "Preemptible Kernel (Low-Latency Desktop)"
  16464. - select PREEMPT_COUNT
  16465. + select PREEMPT
  16466. select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
  16467. help
  16468. This option reduces the latency of the kernel by making
  16469. @@ -52,6 +65,22 @@
  16470. embedded system with latency requirements in the milliseconds
  16471. range.
  16472. +config PREEMPT_RTB
  16473. + bool "Preemptible Kernel (Basic RT)"
  16474. + select PREEMPT_RT_BASE
  16475. + help
  16476. + This option is basically the same as (Low-Latency Desktop) but
  16477. + enables changes which are preliminary for the full preemptible
  16478. + RT kernel.
  16479. +
  16480. +config PREEMPT_RT_FULL
  16481. + bool "Fully Preemptible Kernel (RT)"
  16482. + depends on IRQ_FORCED_THREADING
  16483. + select PREEMPT_RT_BASE
  16484. + select PREEMPT_RCU
  16485. + help
  16486. + All and everything
  16487. +
  16488. endchoice
  16489. config PREEMPT_COUNT
  16490. diff -Nur linux-4.4.62.orig/kernel/ksysfs.c linux-4.4.62/kernel/ksysfs.c
  16491. --- linux-4.4.62.orig/kernel/ksysfs.c 2017-04-18 07:15:37.000000000 +0200
  16492. +++ linux-4.4.62/kernel/ksysfs.c 2017-04-18 17:38:08.210649865 +0200
  16493. @@ -136,6 +136,15 @@
  16494. #endif /* CONFIG_KEXEC_CORE */
  16495. +#if defined(CONFIG_PREEMPT_RT_FULL)
  16496. +static ssize_t realtime_show(struct kobject *kobj,
  16497. + struct kobj_attribute *attr, char *buf)
  16498. +{
  16499. + return sprintf(buf, "%d\n", 1);
  16500. +}
  16501. +KERNEL_ATTR_RO(realtime);
  16502. +#endif
  16503. +
  16504. /* whether file capabilities are enabled */
  16505. static ssize_t fscaps_show(struct kobject *kobj,
  16506. struct kobj_attribute *attr, char *buf)
  16507. @@ -203,6 +212,9 @@
  16508. &vmcoreinfo_attr.attr,
  16509. #endif
  16510. &rcu_expedited_attr.attr,
  16511. +#ifdef CONFIG_PREEMPT_RT_FULL
  16512. + &realtime_attr.attr,
  16513. +#endif
  16514. NULL
  16515. };
  16516. diff -Nur linux-4.4.62.orig/kernel/locking/lglock.c linux-4.4.62/kernel/locking/lglock.c
  16517. --- linux-4.4.62.orig/kernel/locking/lglock.c 2017-04-18 07:15:37.000000000 +0200
  16518. +++ linux-4.4.62/kernel/locking/lglock.c 2017-04-18 17:38:08.210649865 +0200
  16519. @@ -4,6 +4,15 @@
  16520. #include <linux/cpu.h>
  16521. #include <linux/string.h>
  16522. +#ifndef CONFIG_PREEMPT_RT_FULL
  16523. +# define lg_lock_ptr arch_spinlock_t
  16524. +# define lg_do_lock(l) arch_spin_lock(l)
  16525. +# define lg_do_unlock(l) arch_spin_unlock(l)
  16526. +#else
  16527. +# define lg_lock_ptr struct rt_mutex
  16528. +# define lg_do_lock(l) __rt_spin_lock__no_mg(l)
  16529. +# define lg_do_unlock(l) __rt_spin_unlock(l)
  16530. +#endif
  16531. /*
  16532. * Note there is no uninit, so lglocks cannot be defined in
  16533. * modules (but it's fine to use them from there)
  16534. @@ -12,51 +21,60 @@
  16535. void lg_lock_init(struct lglock *lg, char *name)
  16536. {
  16537. +#ifdef CONFIG_PREEMPT_RT_FULL
  16538. + int i;
  16539. +
  16540. + for_each_possible_cpu(i) {
  16541. + struct rt_mutex *lock = per_cpu_ptr(lg->lock, i);
  16542. +
  16543. + rt_mutex_init(lock);
  16544. + }
  16545. +#endif
  16546. LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0);
  16547. }
  16548. EXPORT_SYMBOL(lg_lock_init);
  16549. void lg_local_lock(struct lglock *lg)
  16550. {
  16551. - arch_spinlock_t *lock;
  16552. + lg_lock_ptr *lock;
  16553. - preempt_disable();
  16554. + migrate_disable();
  16555. lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  16556. lock = this_cpu_ptr(lg->lock);
  16557. - arch_spin_lock(lock);
  16558. + lg_do_lock(lock);
  16559. }
  16560. EXPORT_SYMBOL(lg_local_lock);
  16561. void lg_local_unlock(struct lglock *lg)
  16562. {
  16563. - arch_spinlock_t *lock;
  16564. + lg_lock_ptr *lock;
  16565. lock_release(&lg->lock_dep_map, 1, _RET_IP_);
  16566. lock = this_cpu_ptr(lg->lock);
  16567. - arch_spin_unlock(lock);
  16568. - preempt_enable();
  16569. + lg_do_unlock(lock);
  16570. + migrate_enable();
  16571. }
  16572. EXPORT_SYMBOL(lg_local_unlock);
  16573. void lg_local_lock_cpu(struct lglock *lg, int cpu)
  16574. {
  16575. - arch_spinlock_t *lock;
  16576. + lg_lock_ptr *lock;
  16577. - preempt_disable();
  16578. + preempt_disable_nort();
  16579. lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  16580. lock = per_cpu_ptr(lg->lock, cpu);
  16581. - arch_spin_lock(lock);
  16582. + lg_do_lock(lock);
  16583. }
  16584. EXPORT_SYMBOL(lg_local_lock_cpu);
  16585. void lg_local_unlock_cpu(struct lglock *lg, int cpu)
  16586. {
  16587. - arch_spinlock_t *lock;
  16588. + lg_lock_ptr *lock;
  16589. lock_release(&lg->lock_dep_map, 1, _RET_IP_);
  16590. lock = per_cpu_ptr(lg->lock, cpu);
  16591. - arch_spin_unlock(lock);
  16592. - preempt_enable();
  16593. + lg_do_unlock(lock);
  16594. + preempt_enable_nort();
  16595. }
  16596. EXPORT_SYMBOL(lg_local_unlock_cpu);
  16597. @@ -68,30 +86,30 @@
  16598. if (cpu2 < cpu1)
  16599. swap(cpu1, cpu2);
  16600. - preempt_disable();
  16601. + preempt_disable_nort();
  16602. lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  16603. - arch_spin_lock(per_cpu_ptr(lg->lock, cpu1));
  16604. - arch_spin_lock(per_cpu_ptr(lg->lock, cpu2));
  16605. + lg_do_lock(per_cpu_ptr(lg->lock, cpu1));
  16606. + lg_do_lock(per_cpu_ptr(lg->lock, cpu2));
  16607. }
  16608. void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2)
  16609. {
  16610. lock_release(&lg->lock_dep_map, 1, _RET_IP_);
  16611. - arch_spin_unlock(per_cpu_ptr(lg->lock, cpu1));
  16612. - arch_spin_unlock(per_cpu_ptr(lg->lock, cpu2));
  16613. - preempt_enable();
  16614. + lg_do_unlock(per_cpu_ptr(lg->lock, cpu1));
  16615. + lg_do_unlock(per_cpu_ptr(lg->lock, cpu2));
  16616. + preempt_enable_nort();
  16617. }
  16618. void lg_global_lock(struct lglock *lg)
  16619. {
  16620. int i;
  16621. - preempt_disable();
  16622. + preempt_disable_nort();
  16623. lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  16624. for_each_possible_cpu(i) {
  16625. - arch_spinlock_t *lock;
  16626. + lg_lock_ptr *lock;
  16627. lock = per_cpu_ptr(lg->lock, i);
  16628. - arch_spin_lock(lock);
  16629. + lg_do_lock(lock);
  16630. }
  16631. }
  16632. EXPORT_SYMBOL(lg_global_lock);
  16633. @@ -102,10 +120,35 @@
  16634. lock_release(&lg->lock_dep_map, 1, _RET_IP_);
  16635. for_each_possible_cpu(i) {
  16636. - arch_spinlock_t *lock;
  16637. + lg_lock_ptr *lock;
  16638. lock = per_cpu_ptr(lg->lock, i);
  16639. - arch_spin_unlock(lock);
  16640. + lg_do_unlock(lock);
  16641. }
  16642. - preempt_enable();
  16643. + preempt_enable_nort();
  16644. }
  16645. EXPORT_SYMBOL(lg_global_unlock);
  16646. +
  16647. +#ifdef CONFIG_PREEMPT_RT_FULL
  16648. +/*
  16649. + * HACK: If you use this, you get to keep the pieces.
  16650. + * Used in queue_stop_cpus_work() when stop machinery
  16651. + * is called from inactive CPU, so we can't schedule.
  16652. + */
  16653. +# define lg_do_trylock_relax(l) \
  16654. + do { \
  16655. + while (!__rt_spin_trylock(l)) \
  16656. + cpu_relax(); \
  16657. + } while (0)
  16658. +
  16659. +void lg_global_trylock_relax(struct lglock *lg)
  16660. +{
  16661. + int i;
  16662. +
  16663. + lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  16664. + for_each_possible_cpu(i) {
  16665. + lg_lock_ptr *lock;
  16666. + lock = per_cpu_ptr(lg->lock, i);
  16667. + lg_do_trylock_relax(lock);
  16668. + }
  16669. +}
  16670. +#endif
  16671. diff -Nur linux-4.4.62.orig/kernel/locking/lockdep.c linux-4.4.62/kernel/locking/lockdep.c
  16672. --- linux-4.4.62.orig/kernel/locking/lockdep.c 2017-04-18 07:15:37.000000000 +0200
  16673. +++ linux-4.4.62/kernel/locking/lockdep.c 2017-04-18 17:38:08.210649865 +0200
  16674. @@ -668,6 +668,7 @@
  16675. struct lockdep_subclass_key *key;
  16676. struct list_head *hash_head;
  16677. struct lock_class *class;
  16678. + bool is_static = false;
  16679. #ifdef CONFIG_DEBUG_LOCKDEP
  16680. /*
  16681. @@ -695,10 +696,23 @@
  16682. /*
  16683. * Static locks do not have their class-keys yet - for them the key
  16684. - * is the lock object itself:
  16685. - */
  16686. - if (unlikely(!lock->key))
  16687. - lock->key = (void *)lock;
  16688. + * is the lock object itself. If the lock is in the per cpu area,
  16689. + * the canonical address of the lock (per cpu offset removed) is
  16690. + * used.
  16691. + */
  16692. + if (unlikely(!lock->key)) {
  16693. + unsigned long can_addr, addr = (unsigned long)lock;
  16694. +
  16695. + if (__is_kernel_percpu_address(addr, &can_addr))
  16696. + lock->key = (void *)can_addr;
  16697. + else if (__is_module_percpu_address(addr, &can_addr))
  16698. + lock->key = (void *)can_addr;
  16699. + else if (static_obj(lock))
  16700. + lock->key = (void *)lock;
  16701. + else
  16702. + return ERR_PTR(-EINVAL);
  16703. + is_static = true;
  16704. + }
  16705. /*
  16706. * NOTE: the class-key must be unique. For dynamic locks, a static
  16707. @@ -730,7 +744,7 @@
  16708. }
  16709. }
  16710. - return NULL;
  16711. + return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL);
  16712. }
  16713. /*
  16714. @@ -748,19 +762,18 @@
  16715. DEBUG_LOCKS_WARN_ON(!irqs_disabled());
  16716. class = look_up_lock_class(lock, subclass);
  16717. - if (likely(class))
  16718. + if (likely(!IS_ERR_OR_NULL(class)))
  16719. goto out_set_class_cache;
  16720. /*
  16721. * Debug-check: all keys must be persistent!
  16722. - */
  16723. - if (!static_obj(lock->key)) {
  16724. + */
  16725. + if (IS_ERR(class)) {
  16726. debug_locks_off();
  16727. printk("INFO: trying to register non-static key.\n");
  16728. printk("the code is fine but needs lockdep annotation.\n");
  16729. printk("turning off the locking correctness validator.\n");
  16730. dump_stack();
  16731. -
  16732. return NULL;
  16733. }
  16734. @@ -3278,7 +3291,7 @@
  16735. * Clearly if the lock hasn't been acquired _ever_, we're not
  16736. * holding it either, so report failure.
  16737. */
  16738. - if (!class)
  16739. + if (IS_ERR_OR_NULL(class))
  16740. return 0;
  16741. /*
  16742. @@ -3525,6 +3538,7 @@
  16743. }
  16744. }
  16745. +#ifndef CONFIG_PREEMPT_RT_FULL
  16746. /*
  16747. * We dont accurately track softirq state in e.g.
  16748. * hardirq contexts (such as on 4KSTACKS), so only
  16749. @@ -3539,6 +3553,7 @@
  16750. DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
  16751. }
  16752. }
  16753. +#endif
  16754. if (!debug_locks)
  16755. print_irqtrace_events(current);
  16756. @@ -3977,7 +3992,7 @@
  16757. * If the class exists we look it up and zap it:
  16758. */
  16759. class = look_up_lock_class(lock, j);
  16760. - if (class)
  16761. + if (!IS_ERR_OR_NULL(class))
  16762. zap_class(class);
  16763. }
  16764. /*
  16765. diff -Nur linux-4.4.62.orig/kernel/locking/locktorture.c linux-4.4.62/kernel/locking/locktorture.c
  16766. --- linux-4.4.62.orig/kernel/locking/locktorture.c 2017-04-18 07:15:37.000000000 +0200
  16767. +++ linux-4.4.62/kernel/locking/locktorture.c 2017-04-18 17:38:08.210649865 +0200
  16768. @@ -26,7 +26,6 @@
  16769. #include <linux/kthread.h>
  16770. #include <linux/sched/rt.h>
  16771. #include <linux/spinlock.h>
  16772. -#include <linux/rwlock.h>
  16773. #include <linux/mutex.h>
  16774. #include <linux/rwsem.h>
  16775. #include <linux/smp.h>
  16776. diff -Nur linux-4.4.62.orig/kernel/locking/Makefile linux-4.4.62/kernel/locking/Makefile
  16777. --- linux-4.4.62.orig/kernel/locking/Makefile 2017-04-18 07:15:37.000000000 +0200
  16778. +++ linux-4.4.62/kernel/locking/Makefile 2017-04-18 17:38:08.210649865 +0200
  16779. @@ -1,5 +1,5 @@
  16780. -obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
  16781. +obj-y += semaphore.o percpu-rwsem.o
  16782. ifdef CONFIG_FUNCTION_TRACER
  16783. CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
  16784. @@ -8,7 +8,11 @@
  16785. CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE)
  16786. endif
  16787. +ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
  16788. +obj-y += mutex.o
  16789. obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
  16790. +obj-y += rwsem.o
  16791. +endif
  16792. obj-$(CONFIG_LOCKDEP) += lockdep.o
  16793. ifeq ($(CONFIG_PROC_FS),y)
  16794. obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
  16795. @@ -22,7 +26,10 @@
  16796. obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
  16797. obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
  16798. obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
  16799. +ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
  16800. obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
  16801. obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
  16802. +endif
  16803. +obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o
  16804. obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
  16805. obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
  16806. diff -Nur linux-4.4.62.orig/kernel/locking/rt.c linux-4.4.62/kernel/locking/rt.c
  16807. --- linux-4.4.62.orig/kernel/locking/rt.c 1970-01-01 01:00:00.000000000 +0100
  16808. +++ linux-4.4.62/kernel/locking/rt.c 2017-04-18 17:38:08.210649865 +0200
  16809. @@ -0,0 +1,474 @@
  16810. +/*
  16811. + * kernel/rt.c
  16812. + *
  16813. + * Real-Time Preemption Support
  16814. + *
  16815. + * started by Ingo Molnar:
  16816. + *
  16817. + * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  16818. + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  16819. + *
  16820. + * historic credit for proving that Linux spinlocks can be implemented via
  16821. + * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow
  16822. + * and others) who prototyped it on 2.4 and did lots of comparative
  16823. + * research and analysis; TimeSys, for proving that you can implement a
  16824. + * fully preemptible kernel via the use of IRQ threading and mutexes;
  16825. + * Bill Huey for persuasively arguing on lkml that the mutex model is the
  16826. + * right one; and to MontaVista, who ported pmutexes to 2.6.
  16827. + *
  16828. + * This code is a from-scratch implementation and is not based on pmutexes,
  16829. + * but the idea of converting spinlocks to mutexes is used here too.
  16830. + *
  16831. + * lock debugging, locking tree, deadlock detection:
  16832. + *
  16833. + * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey
  16834. + * Released under the General Public License (GPL).
  16835. + *
  16836. + * Includes portions of the generic R/W semaphore implementation from:
  16837. + *
  16838. + * Copyright (c) 2001 David Howells (dhowells@redhat.com).
  16839. + * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
  16840. + * - Derived also from comments by Linus
  16841. + *
  16842. + * Pending ownership of locks and ownership stealing:
  16843. + *
  16844. + * Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt
  16845. + *
  16846. + * (also by Steven Rostedt)
  16847. + * - Converted single pi_lock to individual task locks.
  16848. + *
  16849. + * By Esben Nielsen:
  16850. + * Doing priority inheritance with help of the scheduler.
  16851. + *
  16852. + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  16853. + * - major rework based on Esben Nielsens initial patch
  16854. + * - replaced thread_info references by task_struct refs
  16855. + * - removed task->pending_owner dependency
  16856. + * - BKL drop/reacquire for semaphore style locks to avoid deadlocks
  16857. + * in the scheduler return path as discussed with Steven Rostedt
  16858. + *
  16859. + * Copyright (C) 2006, Kihon Technologies Inc.
  16860. + * Steven Rostedt <rostedt@goodmis.org>
  16861. + * - debugged and patched Thomas Gleixner's rework.
  16862. + * - added back the cmpxchg to the rework.
  16863. + * - turned atomic require back on for SMP.
  16864. + */
  16865. +
  16866. +#include <linux/spinlock.h>
  16867. +#include <linux/rtmutex.h>
  16868. +#include <linux/sched.h>
  16869. +#include <linux/delay.h>
  16870. +#include <linux/module.h>
  16871. +#include <linux/kallsyms.h>
  16872. +#include <linux/syscalls.h>
  16873. +#include <linux/interrupt.h>
  16874. +#include <linux/plist.h>
  16875. +#include <linux/fs.h>
  16876. +#include <linux/futex.h>
  16877. +#include <linux/hrtimer.h>
  16878. +
  16879. +#include "rtmutex_common.h"
  16880. +
  16881. +/*
  16882. + * struct mutex functions
  16883. + */
  16884. +void __mutex_do_init(struct mutex *mutex, const char *name,
  16885. + struct lock_class_key *key)
  16886. +{
  16887. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  16888. + /*
  16889. + * Make sure we are not reinitializing a held lock:
  16890. + */
  16891. + debug_check_no_locks_freed((void *)mutex, sizeof(*mutex));
  16892. + lockdep_init_map(&mutex->dep_map, name, key, 0);
  16893. +#endif
  16894. + mutex->lock.save_state = 0;
  16895. +}
  16896. +EXPORT_SYMBOL(__mutex_do_init);
  16897. +
  16898. +void __lockfunc _mutex_lock(struct mutex *lock)
  16899. +{
  16900. + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  16901. + rt_mutex_lock(&lock->lock);
  16902. +}
  16903. +EXPORT_SYMBOL(_mutex_lock);
  16904. +
  16905. +int __lockfunc _mutex_lock_interruptible(struct mutex *lock)
  16906. +{
  16907. + int ret;
  16908. +
  16909. + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  16910. + ret = rt_mutex_lock_interruptible(&lock->lock);
  16911. + if (ret)
  16912. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  16913. + return ret;
  16914. +}
  16915. +EXPORT_SYMBOL(_mutex_lock_interruptible);
  16916. +
  16917. +int __lockfunc _mutex_lock_killable(struct mutex *lock)
  16918. +{
  16919. + int ret;
  16920. +
  16921. + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  16922. + ret = rt_mutex_lock_killable(&lock->lock);
  16923. + if (ret)
  16924. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  16925. + return ret;
  16926. +}
  16927. +EXPORT_SYMBOL(_mutex_lock_killable);
  16928. +
  16929. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  16930. +void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass)
  16931. +{
  16932. + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
  16933. + rt_mutex_lock(&lock->lock);
  16934. +}
  16935. +EXPORT_SYMBOL(_mutex_lock_nested);
  16936. +
  16937. +void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
  16938. +{
  16939. + mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_);
  16940. + rt_mutex_lock(&lock->lock);
  16941. +}
  16942. +EXPORT_SYMBOL(_mutex_lock_nest_lock);
  16943. +
  16944. +int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass)
  16945. +{
  16946. + int ret;
  16947. +
  16948. + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
  16949. + ret = rt_mutex_lock_interruptible(&lock->lock);
  16950. + if (ret)
  16951. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  16952. + return ret;
  16953. +}
  16954. +EXPORT_SYMBOL(_mutex_lock_interruptible_nested);
  16955. +
  16956. +int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass)
  16957. +{
  16958. + int ret;
  16959. +
  16960. + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
  16961. + ret = rt_mutex_lock_killable(&lock->lock);
  16962. + if (ret)
  16963. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  16964. + return ret;
  16965. +}
  16966. +EXPORT_SYMBOL(_mutex_lock_killable_nested);
  16967. +#endif
  16968. +
  16969. +int __lockfunc _mutex_trylock(struct mutex *lock)
  16970. +{
  16971. + int ret = rt_mutex_trylock(&lock->lock);
  16972. +
  16973. + if (ret)
  16974. + mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  16975. +
  16976. + return ret;
  16977. +}
  16978. +EXPORT_SYMBOL(_mutex_trylock);
  16979. +
  16980. +void __lockfunc _mutex_unlock(struct mutex *lock)
  16981. +{
  16982. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  16983. + rt_mutex_unlock(&lock->lock);
  16984. +}
  16985. +EXPORT_SYMBOL(_mutex_unlock);
  16986. +
  16987. +/*
  16988. + * rwlock_t functions
  16989. + */
  16990. +int __lockfunc rt_write_trylock(rwlock_t *rwlock)
  16991. +{
  16992. + int ret;
  16993. +
  16994. + migrate_disable();
  16995. + ret = rt_mutex_trylock(&rwlock->lock);
  16996. + if (ret)
  16997. + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
  16998. + else
  16999. + migrate_enable();
  17000. +
  17001. + return ret;
  17002. +}
  17003. +EXPORT_SYMBOL(rt_write_trylock);
  17004. +
  17005. +int __lockfunc rt_write_trylock_irqsave(rwlock_t *rwlock, unsigned long *flags)
  17006. +{
  17007. + int ret;
  17008. +
  17009. + *flags = 0;
  17010. + ret = rt_write_trylock(rwlock);
  17011. + return ret;
  17012. +}
  17013. +EXPORT_SYMBOL(rt_write_trylock_irqsave);
  17014. +
  17015. +int __lockfunc rt_read_trylock(rwlock_t *rwlock)
  17016. +{
  17017. + struct rt_mutex *lock = &rwlock->lock;
  17018. + int ret = 1;
  17019. +
  17020. + /*
  17021. + * recursive read locks succeed when current owns the lock,
  17022. + * but not when read_depth == 0 which means that the lock is
  17023. + * write locked.
  17024. + */
  17025. + if (rt_mutex_owner(lock) != current) {
  17026. + migrate_disable();
  17027. + ret = rt_mutex_trylock(lock);
  17028. + if (ret)
  17029. + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
  17030. + else
  17031. + migrate_enable();
  17032. +
  17033. + } else if (!rwlock->read_depth) {
  17034. + ret = 0;
  17035. + }
  17036. +
  17037. + if (ret)
  17038. + rwlock->read_depth++;
  17039. +
  17040. + return ret;
  17041. +}
  17042. +EXPORT_SYMBOL(rt_read_trylock);
  17043. +
  17044. +void __lockfunc rt_write_lock(rwlock_t *rwlock)
  17045. +{
  17046. + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
  17047. + __rt_spin_lock(&rwlock->lock);
  17048. +}
  17049. +EXPORT_SYMBOL(rt_write_lock);
  17050. +
  17051. +void __lockfunc rt_read_lock(rwlock_t *rwlock)
  17052. +{
  17053. + struct rt_mutex *lock = &rwlock->lock;
  17054. +
  17055. +
  17056. + /*
  17057. + * recursive read locks succeed when current owns the lock
  17058. + */
  17059. + if (rt_mutex_owner(lock) != current) {
  17060. + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
  17061. + __rt_spin_lock(lock);
  17062. + }
  17063. + rwlock->read_depth++;
  17064. +}
  17065. +
  17066. +EXPORT_SYMBOL(rt_read_lock);
  17067. +
  17068. +void __lockfunc rt_write_unlock(rwlock_t *rwlock)
  17069. +{
  17070. + /* NOTE: we always pass in '1' for nested, for simplicity */
  17071. + rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
  17072. + __rt_spin_unlock(&rwlock->lock);
  17073. + migrate_enable();
  17074. +}
  17075. +EXPORT_SYMBOL(rt_write_unlock);
  17076. +
  17077. +void __lockfunc rt_read_unlock(rwlock_t *rwlock)
  17078. +{
  17079. + /* Release the lock only when read_depth is down to 0 */
  17080. + if (--rwlock->read_depth == 0) {
  17081. + rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
  17082. + __rt_spin_unlock(&rwlock->lock);
  17083. + migrate_enable();
  17084. + }
  17085. +}
  17086. +EXPORT_SYMBOL(rt_read_unlock);
  17087. +
  17088. +unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock)
  17089. +{
  17090. + rt_write_lock(rwlock);
  17091. +
  17092. + return 0;
  17093. +}
  17094. +EXPORT_SYMBOL(rt_write_lock_irqsave);
  17095. +
  17096. +unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock)
  17097. +{
  17098. + rt_read_lock(rwlock);
  17099. +
  17100. + return 0;
  17101. +}
  17102. +EXPORT_SYMBOL(rt_read_lock_irqsave);
  17103. +
  17104. +void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key)
  17105. +{
  17106. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  17107. + /*
  17108. + * Make sure we are not reinitializing a held lock:
  17109. + */
  17110. + debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock));
  17111. + lockdep_init_map(&rwlock->dep_map, name, key, 0);
  17112. +#endif
  17113. + rwlock->lock.save_state = 1;
  17114. + rwlock->read_depth = 0;
  17115. +}
  17116. +EXPORT_SYMBOL(__rt_rwlock_init);
  17117. +
  17118. +/*
  17119. + * rw_semaphores
  17120. + */
  17121. +
  17122. +void rt_up_write(struct rw_semaphore *rwsem)
  17123. +{
  17124. + rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
  17125. + rt_mutex_unlock(&rwsem->lock);
  17126. +}
  17127. +EXPORT_SYMBOL(rt_up_write);
  17128. +
  17129. +void __rt_up_read(struct rw_semaphore *rwsem)
  17130. +{
  17131. + if (--rwsem->read_depth == 0)
  17132. + rt_mutex_unlock(&rwsem->lock);
  17133. +}
  17134. +
  17135. +void rt_up_read(struct rw_semaphore *rwsem)
  17136. +{
  17137. + rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
  17138. + __rt_up_read(rwsem);
  17139. +}
  17140. +EXPORT_SYMBOL(rt_up_read);
  17141. +
  17142. +/*
  17143. + * downgrade a write lock into a read lock
  17144. + * - just wake up any readers at the front of the queue
  17145. + */
  17146. +void rt_downgrade_write(struct rw_semaphore *rwsem)
  17147. +{
  17148. + BUG_ON(rt_mutex_owner(&rwsem->lock) != current);
  17149. + rwsem->read_depth = 1;
  17150. +}
  17151. +EXPORT_SYMBOL(rt_downgrade_write);
  17152. +
  17153. +int rt_down_write_trylock(struct rw_semaphore *rwsem)
  17154. +{
  17155. + int ret = rt_mutex_trylock(&rwsem->lock);
  17156. +
  17157. + if (ret)
  17158. + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
  17159. + return ret;
  17160. +}
  17161. +EXPORT_SYMBOL(rt_down_write_trylock);
  17162. +
  17163. +void rt_down_write(struct rw_semaphore *rwsem)
  17164. +{
  17165. + rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_);
  17166. + rt_mutex_lock(&rwsem->lock);
  17167. +}
  17168. +EXPORT_SYMBOL(rt_down_write);
  17169. +
  17170. +void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass)
  17171. +{
  17172. + rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_);
  17173. + rt_mutex_lock(&rwsem->lock);
  17174. +}
  17175. +EXPORT_SYMBOL(rt_down_write_nested);
  17176. +
  17177. +void rt_down_write_nested_lock(struct rw_semaphore *rwsem,
  17178. + struct lockdep_map *nest)
  17179. +{
  17180. + rwsem_acquire_nest(&rwsem->dep_map, 0, 0, nest, _RET_IP_);
  17181. + rt_mutex_lock(&rwsem->lock);
  17182. +}
  17183. +EXPORT_SYMBOL(rt_down_write_nested_lock);
  17184. +
  17185. +int rt__down_read_trylock(struct rw_semaphore *rwsem)
  17186. +{
  17187. + struct rt_mutex *lock = &rwsem->lock;
  17188. + int ret = 1;
  17189. +
  17190. + /*
  17191. + * recursive read locks succeed when current owns the rwsem,
  17192. + * but not when read_depth == 0 which means that the rwsem is
  17193. + * write locked.
  17194. + */
  17195. + if (rt_mutex_owner(lock) != current)
  17196. + ret = rt_mutex_trylock(&rwsem->lock);
  17197. + else if (!rwsem->read_depth)
  17198. + ret = 0;
  17199. +
  17200. + if (ret)
  17201. + rwsem->read_depth++;
  17202. + return ret;
  17203. +
  17204. +}
  17205. +
  17206. +int rt_down_read_trylock(struct rw_semaphore *rwsem)
  17207. +{
  17208. + int ret;
  17209. +
  17210. + ret = rt__down_read_trylock(rwsem);
  17211. + if (ret)
  17212. + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
  17213. +
  17214. + return ret;
  17215. +}
  17216. +EXPORT_SYMBOL(rt_down_read_trylock);
  17217. +
  17218. +void rt__down_read(struct rw_semaphore *rwsem)
  17219. +{
  17220. + struct rt_mutex *lock = &rwsem->lock;
  17221. +
  17222. + if (rt_mutex_owner(lock) != current)
  17223. + rt_mutex_lock(&rwsem->lock);
  17224. + rwsem->read_depth++;
  17225. +}
  17226. +EXPORT_SYMBOL(rt__down_read);
  17227. +
  17228. +static void __rt_down_read(struct rw_semaphore *rwsem, int subclass)
  17229. +{
  17230. + rwsem_acquire_read(&rwsem->dep_map, subclass, 0, _RET_IP_);
  17231. + rt__down_read(rwsem);
  17232. +}
  17233. +
  17234. +void rt_down_read(struct rw_semaphore *rwsem)
  17235. +{
  17236. + __rt_down_read(rwsem, 0);
  17237. +}
  17238. +EXPORT_SYMBOL(rt_down_read);
  17239. +
  17240. +void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass)
  17241. +{
  17242. + __rt_down_read(rwsem, subclass);
  17243. +}
  17244. +EXPORT_SYMBOL(rt_down_read_nested);
  17245. +
  17246. +void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name,
  17247. + struct lock_class_key *key)
  17248. +{
  17249. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  17250. + /*
  17251. + * Make sure we are not reinitializing a held lock:
  17252. + */
  17253. + debug_check_no_locks_freed((void *)rwsem, sizeof(*rwsem));
  17254. + lockdep_init_map(&rwsem->dep_map, name, key, 0);
  17255. +#endif
  17256. + rwsem->read_depth = 0;
  17257. + rwsem->lock.save_state = 0;
  17258. +}
  17259. +EXPORT_SYMBOL(__rt_rwsem_init);
  17260. +
  17261. +/**
  17262. + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
  17263. + * @cnt: the atomic which we are to dec
  17264. + * @lock: the mutex to return holding if we dec to 0
  17265. + *
  17266. + * return true and hold lock if we dec to 0, return false otherwise
  17267. + */
  17268. +int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
  17269. +{
  17270. + /* dec if we can't possibly hit 0 */
  17271. + if (atomic_add_unless(cnt, -1, 1))
  17272. + return 0;
  17273. + /* we might hit 0, so take the lock */
  17274. + mutex_lock(lock);
  17275. + if (!atomic_dec_and_test(cnt)) {
  17276. + /* when we actually did the dec, we didn't hit 0 */
  17277. + mutex_unlock(lock);
  17278. + return 0;
  17279. + }
  17280. + /* we hit 0, and we hold the lock */
  17281. + return 1;
  17282. +}
  17283. +EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
  17284. diff -Nur linux-4.4.62.orig/kernel/locking/rtmutex.c linux-4.4.62/kernel/locking/rtmutex.c
  17285. --- linux-4.4.62.orig/kernel/locking/rtmutex.c 2017-04-18 07:15:37.000000000 +0200
  17286. +++ linux-4.4.62/kernel/locking/rtmutex.c 2017-04-18 17:38:08.210649865 +0200
  17287. @@ -7,6 +7,11 @@
  17288. * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  17289. * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
  17290. * Copyright (C) 2006 Esben Nielsen
  17291. + * Adaptive Spinlocks:
  17292. + * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
  17293. + * and Peter Morreale,
  17294. + * Adaptive Spinlocks simplification:
  17295. + * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
  17296. *
  17297. * See Documentation/locking/rt-mutex-design.txt for details.
  17298. */
  17299. @@ -16,6 +21,7 @@
  17300. #include <linux/sched/rt.h>
  17301. #include <linux/sched/deadline.h>
  17302. #include <linux/timer.h>
  17303. +#include <linux/ww_mutex.h>
  17304. #include "rtmutex_common.h"
  17305. @@ -133,6 +139,12 @@
  17306. WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
  17307. }
  17308. +static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter)
  17309. +{
  17310. + return waiter && waiter != PI_WAKEUP_INPROGRESS &&
  17311. + waiter != PI_REQUEUE_INPROGRESS;
  17312. +}
  17313. +
  17314. /*
  17315. * We can speed up the acquire/release, if there's no debugging state to be
  17316. * set up.
  17317. @@ -163,13 +175,14 @@
  17318. * 2) Drop lock->wait_lock
  17319. * 3) Try to unlock the lock with cmpxchg
  17320. */
  17321. -static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
  17322. +static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
  17323. + unsigned long flags)
  17324. __releases(lock->wait_lock)
  17325. {
  17326. struct task_struct *owner = rt_mutex_owner(lock);
  17327. clear_rt_mutex_waiters(lock);
  17328. - raw_spin_unlock(&lock->wait_lock);
  17329. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  17330. /*
  17331. * If a new waiter comes in between the unlock and the cmpxchg
  17332. * we have two situations:
  17333. @@ -211,11 +224,12 @@
  17334. /*
  17335. * Simple slow path only version: lock->owner is protected by lock->wait_lock.
  17336. */
  17337. -static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
  17338. +static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
  17339. + unsigned long flags)
  17340. __releases(lock->wait_lock)
  17341. {
  17342. lock->owner = NULL;
  17343. - raw_spin_unlock(&lock->wait_lock);
  17344. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  17345. return true;
  17346. }
  17347. #endif
  17348. @@ -412,6 +426,14 @@
  17349. return debug_rt_mutex_detect_deadlock(waiter, chwalk);
  17350. }
  17351. +static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter)
  17352. +{
  17353. + if (waiter->savestate)
  17354. + wake_up_lock_sleeper(waiter->task);
  17355. + else
  17356. + wake_up_process(waiter->task);
  17357. +}
  17358. +
  17359. /*
  17360. * Max number of times we'll walk the boosting chain:
  17361. */
  17362. @@ -419,7 +441,8 @@
  17363. static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
  17364. {
  17365. - return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
  17366. + return rt_mutex_real_waiter(p->pi_blocked_on) ?
  17367. + p->pi_blocked_on->lock : NULL;
  17368. }
  17369. /*
  17370. @@ -497,7 +520,6 @@
  17371. int ret = 0, depth = 0;
  17372. struct rt_mutex *lock;
  17373. bool detect_deadlock;
  17374. - unsigned long flags;
  17375. bool requeue = true;
  17376. detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
  17377. @@ -540,7 +562,7 @@
  17378. /*
  17379. * [1] Task cannot go away as we did a get_task() before !
  17380. */
  17381. - raw_spin_lock_irqsave(&task->pi_lock, flags);
  17382. + raw_spin_lock_irq(&task->pi_lock);
  17383. /*
  17384. * [2] Get the waiter on which @task is blocked on.
  17385. @@ -556,7 +578,7 @@
  17386. * reached or the state of the chain has changed while we
  17387. * dropped the locks.
  17388. */
  17389. - if (!waiter)
  17390. + if (!rt_mutex_real_waiter(waiter))
  17391. goto out_unlock_pi;
  17392. /*
  17393. @@ -624,7 +646,7 @@
  17394. * operations.
  17395. */
  17396. if (!raw_spin_trylock(&lock->wait_lock)) {
  17397. - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  17398. + raw_spin_unlock_irq(&task->pi_lock);
  17399. cpu_relax();
  17400. goto retry;
  17401. }
  17402. @@ -655,7 +677,7 @@
  17403. /*
  17404. * No requeue[7] here. Just release @task [8]
  17405. */
  17406. - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  17407. + raw_spin_unlock(&task->pi_lock);
  17408. put_task_struct(task);
  17409. /*
  17410. @@ -663,14 +685,14 @@
  17411. * If there is no owner of the lock, end of chain.
  17412. */
  17413. if (!rt_mutex_owner(lock)) {
  17414. - raw_spin_unlock(&lock->wait_lock);
  17415. + raw_spin_unlock_irq(&lock->wait_lock);
  17416. return 0;
  17417. }
  17418. /* [10] Grab the next task, i.e. owner of @lock */
  17419. task = rt_mutex_owner(lock);
  17420. get_task_struct(task);
  17421. - raw_spin_lock_irqsave(&task->pi_lock, flags);
  17422. + raw_spin_lock(&task->pi_lock);
  17423. /*
  17424. * No requeue [11] here. We just do deadlock detection.
  17425. @@ -685,8 +707,8 @@
  17426. top_waiter = rt_mutex_top_waiter(lock);
  17427. /* [13] Drop locks */
  17428. - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  17429. - raw_spin_unlock(&lock->wait_lock);
  17430. + raw_spin_unlock(&task->pi_lock);
  17431. + raw_spin_unlock_irq(&lock->wait_lock);
  17432. /* If owner is not blocked, end of chain. */
  17433. if (!next_lock)
  17434. @@ -707,7 +729,7 @@
  17435. rt_mutex_enqueue(lock, waiter);
  17436. /* [8] Release the task */
  17437. - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  17438. + raw_spin_unlock(&task->pi_lock);
  17439. put_task_struct(task);
  17440. /*
  17441. @@ -718,21 +740,24 @@
  17442. * follow here. This is the end of the chain we are walking.
  17443. */
  17444. if (!rt_mutex_owner(lock)) {
  17445. + struct rt_mutex_waiter *lock_top_waiter;
  17446. +
  17447. /*
  17448. * If the requeue [7] above changed the top waiter,
  17449. * then we need to wake the new top waiter up to try
  17450. * to get the lock.
  17451. */
  17452. - if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
  17453. - wake_up_process(rt_mutex_top_waiter(lock)->task);
  17454. - raw_spin_unlock(&lock->wait_lock);
  17455. + lock_top_waiter = rt_mutex_top_waiter(lock);
  17456. + if (prerequeue_top_waiter != lock_top_waiter)
  17457. + rt_mutex_wake_waiter(lock_top_waiter);
  17458. + raw_spin_unlock_irq(&lock->wait_lock);
  17459. return 0;
  17460. }
  17461. /* [10] Grab the next task, i.e. the owner of @lock */
  17462. task = rt_mutex_owner(lock);
  17463. get_task_struct(task);
  17464. - raw_spin_lock_irqsave(&task->pi_lock, flags);
  17465. + raw_spin_lock(&task->pi_lock);
  17466. /* [11] requeue the pi waiters if necessary */
  17467. if (waiter == rt_mutex_top_waiter(lock)) {
  17468. @@ -786,8 +811,8 @@
  17469. top_waiter = rt_mutex_top_waiter(lock);
  17470. /* [13] Drop the locks */
  17471. - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  17472. - raw_spin_unlock(&lock->wait_lock);
  17473. + raw_spin_unlock(&task->pi_lock);
  17474. + raw_spin_unlock_irq(&lock->wait_lock);
  17475. /*
  17476. * Make the actual exit decisions [12], based on the stored
  17477. @@ -810,28 +835,46 @@
  17478. goto again;
  17479. out_unlock_pi:
  17480. - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  17481. + raw_spin_unlock_irq(&task->pi_lock);
  17482. out_put_task:
  17483. put_task_struct(task);
  17484. return ret;
  17485. }
  17486. +
  17487. +#define STEAL_NORMAL 0
  17488. +#define STEAL_LATERAL 1
  17489. +
  17490. +/*
  17491. + * Note that RT tasks are excluded from lateral-steals to prevent the
  17492. + * introduction of an unbounded latency
  17493. + */
  17494. +static inline int lock_is_stealable(struct task_struct *task,
  17495. + struct task_struct *pendowner, int mode)
  17496. +{
  17497. + if (mode == STEAL_NORMAL || rt_task(task)) {
  17498. + if (task->prio >= pendowner->prio)
  17499. + return 0;
  17500. + } else if (task->prio > pendowner->prio)
  17501. + return 0;
  17502. + return 1;
  17503. +}
  17504. +
  17505. /*
  17506. * Try to take an rt-mutex
  17507. *
  17508. - * Must be called with lock->wait_lock held.
  17509. + * Must be called with lock->wait_lock held and interrupts disabled
  17510. *
  17511. * @lock: The lock to be acquired.
  17512. * @task: The task which wants to acquire the lock
  17513. * @waiter: The waiter that is queued to the lock's wait tree if the
  17514. * callsite called task_blocked_on_lock(), otherwise NULL
  17515. */
  17516. -static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
  17517. - struct rt_mutex_waiter *waiter)
  17518. +static int __try_to_take_rt_mutex(struct rt_mutex *lock,
  17519. + struct task_struct *task,
  17520. + struct rt_mutex_waiter *waiter, int mode)
  17521. {
  17522. - unsigned long flags;
  17523. -
  17524. /*
  17525. * Before testing whether we can acquire @lock, we set the
  17526. * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
  17527. @@ -867,8 +910,10 @@
  17528. * If waiter is not the highest priority waiter of
  17529. * @lock, give up.
  17530. */
  17531. - if (waiter != rt_mutex_top_waiter(lock))
  17532. + if (waiter != rt_mutex_top_waiter(lock)) {
  17533. + /* XXX lock_is_stealable() ? */
  17534. return 0;
  17535. + }
  17536. /*
  17537. * We can acquire the lock. Remove the waiter from the
  17538. @@ -886,14 +931,10 @@
  17539. * not need to be dequeued.
  17540. */
  17541. if (rt_mutex_has_waiters(lock)) {
  17542. - /*
  17543. - * If @task->prio is greater than or equal to
  17544. - * the top waiter priority (kernel view),
  17545. - * @task lost.
  17546. - */
  17547. - if (task->prio >= rt_mutex_top_waiter(lock)->prio)
  17548. - return 0;
  17549. + struct task_struct *pown = rt_mutex_top_waiter(lock)->task;
  17550. + if (task != pown && !lock_is_stealable(task, pown, mode))
  17551. + return 0;
  17552. /*
  17553. * The current top waiter stays enqueued. We
  17554. * don't have to change anything in the lock
  17555. @@ -916,7 +957,7 @@
  17556. * case, but conditionals are more expensive than a redundant
  17557. * store.
  17558. */
  17559. - raw_spin_lock_irqsave(&task->pi_lock, flags);
  17560. + raw_spin_lock(&task->pi_lock);
  17561. task->pi_blocked_on = NULL;
  17562. /*
  17563. * Finish the lock acquisition. @task is the new owner. If
  17564. @@ -925,7 +966,7 @@
  17565. */
  17566. if (rt_mutex_has_waiters(lock))
  17567. rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
  17568. - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  17569. + raw_spin_unlock(&task->pi_lock);
  17570. takeit:
  17571. /* We got the lock. */
  17572. @@ -942,12 +983,444 @@
  17573. return 1;
  17574. }
  17575. +#ifdef CONFIG_PREEMPT_RT_FULL
  17576. +/*
  17577. + * preemptible spin_lock functions:
  17578. + */
  17579. +static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
  17580. + void (*slowfn)(struct rt_mutex *lock,
  17581. + bool mg_off),
  17582. + bool do_mig_dis)
  17583. +{
  17584. + might_sleep_no_state_check();
  17585. +
  17586. + if (do_mig_dis)
  17587. + migrate_disable();
  17588. +
  17589. + if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
  17590. + rt_mutex_deadlock_account_lock(lock, current);
  17591. + else
  17592. + slowfn(lock, do_mig_dis);
  17593. +}
  17594. +
  17595. +static inline int rt_spin_lock_fastunlock(struct rt_mutex *lock,
  17596. + int (*slowfn)(struct rt_mutex *lock))
  17597. +{
  17598. + if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
  17599. + rt_mutex_deadlock_account_unlock(current);
  17600. + return 0;
  17601. + }
  17602. + return slowfn(lock);
  17603. +}
  17604. +#ifdef CONFIG_SMP
  17605. +/*
  17606. + * Note that owner is a speculative pointer and dereferencing relies
  17607. + * on rcu_read_lock() and the check against the lock owner.
  17608. + */
  17609. +static int adaptive_wait(struct rt_mutex *lock,
  17610. + struct task_struct *owner)
  17611. +{
  17612. + int res = 0;
  17613. +
  17614. + rcu_read_lock();
  17615. + for (;;) {
  17616. + if (owner != rt_mutex_owner(lock))
  17617. + break;
  17618. + /*
  17619. + * Ensure that owner->on_cpu is dereferenced _after_
  17620. + * checking the above to be valid.
  17621. + */
  17622. + barrier();
  17623. + if (!owner->on_cpu) {
  17624. + res = 1;
  17625. + break;
  17626. + }
  17627. + cpu_relax();
  17628. + }
  17629. + rcu_read_unlock();
  17630. + return res;
  17631. +}
  17632. +#else
  17633. +static int adaptive_wait(struct rt_mutex *lock,
  17634. + struct task_struct *orig_owner)
  17635. +{
  17636. + return 1;
  17637. +}
  17638. +#endif
  17639. +
  17640. +static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
  17641. + struct rt_mutex_waiter *waiter,
  17642. + struct task_struct *task,
  17643. + enum rtmutex_chainwalk chwalk);
  17644. +/*
  17645. + * Slow path lock function spin_lock style: this variant is very
  17646. + * careful not to miss any non-lock wakeups.
  17647. + *
  17648. + * We store the current state under p->pi_lock in p->saved_state and
  17649. + * the try_to_wake_up() code handles this accordingly.
  17650. + */
  17651. +static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock,
  17652. + bool mg_off)
  17653. +{
  17654. + struct task_struct *lock_owner, *self = current;
  17655. + struct rt_mutex_waiter waiter, *top_waiter;
  17656. + unsigned long flags;
  17657. + int ret;
  17658. +
  17659. + rt_mutex_init_waiter(&waiter, true);
  17660. +
  17661. + raw_spin_lock_irqsave(&lock->wait_lock, flags);
  17662. +
  17663. + if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) {
  17664. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  17665. + return;
  17666. + }
  17667. +
  17668. + BUG_ON(rt_mutex_owner(lock) == self);
  17669. +
  17670. + /*
  17671. + * We save whatever state the task is in and we'll restore it
  17672. + * after acquiring the lock taking real wakeups into account
  17673. + * as well. We are serialized via pi_lock against wakeups. See
  17674. + * try_to_wake_up().
  17675. + */
  17676. + raw_spin_lock(&self->pi_lock);
  17677. + self->saved_state = self->state;
  17678. + __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
  17679. + raw_spin_unlock(&self->pi_lock);
  17680. +
  17681. + ret = task_blocks_on_rt_mutex(lock, &waiter, self, RT_MUTEX_MIN_CHAINWALK);
  17682. + BUG_ON(ret);
  17683. +
  17684. + for (;;) {
  17685. + /* Try to acquire the lock again. */
  17686. + if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL))
  17687. + break;
  17688. +
  17689. + top_waiter = rt_mutex_top_waiter(lock);
  17690. + lock_owner = rt_mutex_owner(lock);
  17691. +
  17692. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  17693. +
  17694. + debug_rt_mutex_print_deadlock(&waiter);
  17695. +
  17696. + if (top_waiter != &waiter || adaptive_wait(lock, lock_owner)) {
  17697. + if (mg_off)
  17698. + migrate_enable();
  17699. + schedule();
  17700. + if (mg_off)
  17701. + migrate_disable();
  17702. + }
  17703. +
  17704. + raw_spin_lock_irqsave(&lock->wait_lock, flags);
  17705. +
  17706. + raw_spin_lock(&self->pi_lock);
  17707. + __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
  17708. + raw_spin_unlock(&self->pi_lock);
  17709. + }
  17710. +
  17711. + /*
  17712. + * Restore the task state to current->saved_state. We set it
  17713. + * to the original state above and the try_to_wake_up() code
  17714. + * has possibly updated it when a real (non-rtmutex) wakeup
  17715. + * happened while we were blocked. Clear saved_state so
  17716. + * try_to_wakeup() does not get confused.
  17717. + */
  17718. + raw_spin_lock(&self->pi_lock);
  17719. + __set_current_state_no_track(self->saved_state);
  17720. + self->saved_state = TASK_RUNNING;
  17721. + raw_spin_unlock(&self->pi_lock);
  17722. +
  17723. + /*
  17724. + * try_to_take_rt_mutex() sets the waiter bit
  17725. + * unconditionally. We might have to fix that up:
  17726. + */
  17727. + fixup_rt_mutex_waiters(lock);
  17728. +
  17729. + BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock));
  17730. + BUG_ON(!RB_EMPTY_NODE(&waiter.tree_entry));
  17731. +
  17732. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  17733. +
  17734. + debug_rt_mutex_free_waiter(&waiter);
  17735. +}
  17736. +
  17737. +static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
  17738. + struct wake_q_head *wake_sleeper_q,
  17739. + struct rt_mutex *lock);
  17740. +/*
  17741. + * Slow path to release a rt_mutex spin_lock style
  17742. + */
  17743. +static int noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
  17744. +{
  17745. + unsigned long flags;
  17746. + WAKE_Q(wake_q);
  17747. + WAKE_Q(wake_sleeper_q);
  17748. +
  17749. + raw_spin_lock_irqsave(&lock->wait_lock, flags);
  17750. +
  17751. + debug_rt_mutex_unlock(lock);
  17752. +
  17753. + rt_mutex_deadlock_account_unlock(current);
  17754. +
  17755. + if (!rt_mutex_has_waiters(lock)) {
  17756. + lock->owner = NULL;
  17757. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  17758. + return 0;
  17759. + }
  17760. +
  17761. + mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock);
  17762. +
  17763. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  17764. + wake_up_q(&wake_q);
  17765. + wake_up_q_sleeper(&wake_sleeper_q);
  17766. +
  17767. + /* Undo pi boosting.when necessary */
  17768. + rt_mutex_adjust_prio(current);
  17769. + return 0;
  17770. +}
  17771. +
  17772. +static int noinline __sched rt_spin_lock_slowunlock_no_deboost(struct rt_mutex *lock)
  17773. +{
  17774. + unsigned long flags;
  17775. + WAKE_Q(wake_q);
  17776. + WAKE_Q(wake_sleeper_q);
  17777. +
  17778. + raw_spin_lock_irqsave(&lock->wait_lock, flags);
  17779. +
  17780. + debug_rt_mutex_unlock(lock);
  17781. +
  17782. + rt_mutex_deadlock_account_unlock(current);
  17783. +
  17784. + if (!rt_mutex_has_waiters(lock)) {
  17785. + lock->owner = NULL;
  17786. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  17787. + return 0;
  17788. + }
  17789. +
  17790. + mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock);
  17791. +
  17792. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  17793. + wake_up_q(&wake_q);
  17794. + wake_up_q_sleeper(&wake_sleeper_q);
  17795. + return 1;
  17796. +}
  17797. +
  17798. +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock)
  17799. +{
  17800. + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, false);
  17801. + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  17802. +}
  17803. +EXPORT_SYMBOL(rt_spin_lock__no_mg);
  17804. +
  17805. +void __lockfunc rt_spin_lock(spinlock_t *lock)
  17806. +{
  17807. + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true);
  17808. + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  17809. +}
  17810. +EXPORT_SYMBOL(rt_spin_lock);
  17811. +
  17812. +void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
  17813. +{
  17814. + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, true);
  17815. +}
  17816. +EXPORT_SYMBOL(__rt_spin_lock);
  17817. +
  17818. +void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock)
  17819. +{
  17820. + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, false);
  17821. +}
  17822. +EXPORT_SYMBOL(__rt_spin_lock__no_mg);
  17823. +
  17824. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  17825. +void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
  17826. +{
  17827. + spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
  17828. + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true);
  17829. +}
  17830. +EXPORT_SYMBOL(rt_spin_lock_nested);
  17831. +#endif
  17832. +
  17833. +void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock)
  17834. +{
  17835. + /* NOTE: we always pass in '1' for nested, for simplicity */
  17836. + spin_release(&lock->dep_map, 1, _RET_IP_);
  17837. + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
  17838. +}
  17839. +EXPORT_SYMBOL(rt_spin_unlock__no_mg);
  17840. +
  17841. +void __lockfunc rt_spin_unlock(spinlock_t *lock)
  17842. +{
  17843. + /* NOTE: we always pass in '1' for nested, for simplicity */
  17844. + spin_release(&lock->dep_map, 1, _RET_IP_);
  17845. + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
  17846. + migrate_enable();
  17847. +}
  17848. +EXPORT_SYMBOL(rt_spin_unlock);
  17849. +
  17850. +int __lockfunc rt_spin_unlock_no_deboost(spinlock_t *lock)
  17851. +{
  17852. + int ret;
  17853. +
  17854. + /* NOTE: we always pass in '1' for nested, for simplicity */
  17855. + spin_release(&lock->dep_map, 1, _RET_IP_);
  17856. + ret = rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock_no_deboost);
  17857. + migrate_enable();
  17858. + return ret;
  17859. +}
  17860. +
  17861. +void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
  17862. +{
  17863. + rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
  17864. +}
  17865. +EXPORT_SYMBOL(__rt_spin_unlock);
  17866. +
  17867. +/*
  17868. + * Wait for the lock to get unlocked: instead of polling for an unlock
  17869. + * (like raw spinlocks do), we lock and unlock, to force the kernel to
  17870. + * schedule if there's contention:
  17871. + */
  17872. +void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
  17873. +{
  17874. + spin_lock(lock);
  17875. + spin_unlock(lock);
  17876. +}
  17877. +EXPORT_SYMBOL(rt_spin_unlock_wait);
  17878. +
  17879. +int __lockfunc __rt_spin_trylock(struct rt_mutex *lock)
  17880. +{
  17881. + return rt_mutex_trylock(lock);
  17882. +}
  17883. +
  17884. +int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock)
  17885. +{
  17886. + int ret;
  17887. +
  17888. + ret = rt_mutex_trylock(&lock->lock);
  17889. + if (ret)
  17890. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  17891. + return ret;
  17892. +}
  17893. +EXPORT_SYMBOL(rt_spin_trylock__no_mg);
  17894. +
  17895. +int __lockfunc rt_spin_trylock(spinlock_t *lock)
  17896. +{
  17897. + int ret;
  17898. +
  17899. + migrate_disable();
  17900. + ret = rt_mutex_trylock(&lock->lock);
  17901. + if (ret)
  17902. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  17903. + else
  17904. + migrate_enable();
  17905. + return ret;
  17906. +}
  17907. +EXPORT_SYMBOL(rt_spin_trylock);
  17908. +
  17909. +int __lockfunc rt_spin_trylock_bh(spinlock_t *lock)
  17910. +{
  17911. + int ret;
  17912. +
  17913. + local_bh_disable();
  17914. + ret = rt_mutex_trylock(&lock->lock);
  17915. + if (ret) {
  17916. + migrate_disable();
  17917. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  17918. + } else
  17919. + local_bh_enable();
  17920. + return ret;
  17921. +}
  17922. +EXPORT_SYMBOL(rt_spin_trylock_bh);
  17923. +
  17924. +int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags)
  17925. +{
  17926. + int ret;
  17927. +
  17928. + *flags = 0;
  17929. + ret = rt_mutex_trylock(&lock->lock);
  17930. + if (ret) {
  17931. + migrate_disable();
  17932. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  17933. + }
  17934. + return ret;
  17935. +}
  17936. +EXPORT_SYMBOL(rt_spin_trylock_irqsave);
  17937. +
  17938. +int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock)
  17939. +{
  17940. + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
  17941. + if (atomic_add_unless(atomic, -1, 1))
  17942. + return 0;
  17943. + rt_spin_lock(lock);
  17944. + if (atomic_dec_and_test(atomic))
  17945. + return 1;
  17946. + rt_spin_unlock(lock);
  17947. + return 0;
  17948. +}
  17949. +EXPORT_SYMBOL(atomic_dec_and_spin_lock);
  17950. +
  17951. + void
  17952. +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key)
  17953. +{
  17954. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  17955. + /*
  17956. + * Make sure we are not reinitializing a held lock:
  17957. + */
  17958. + debug_check_no_locks_freed((void *)lock, sizeof(*lock));
  17959. + lockdep_init_map(&lock->dep_map, name, key, 0);
  17960. +#endif
  17961. +}
  17962. +EXPORT_SYMBOL(__rt_spin_lock_init);
  17963. +
  17964. +#endif /* PREEMPT_RT_FULL */
  17965. +
  17966. +#ifdef CONFIG_PREEMPT_RT_FULL
  17967. + static inline int __sched
  17968. +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
  17969. +{
  17970. + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
  17971. + struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx);
  17972. +
  17973. + if (!hold_ctx)
  17974. + return 0;
  17975. +
  17976. + if (unlikely(ctx == hold_ctx))
  17977. + return -EALREADY;
  17978. +
  17979. + if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
  17980. + (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
  17981. +#ifdef CONFIG_DEBUG_MUTEXES
  17982. + DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
  17983. + ctx->contending_lock = ww;
  17984. +#endif
  17985. + return -EDEADLK;
  17986. + }
  17987. +
  17988. + return 0;
  17989. +}
  17990. +#else
  17991. + static inline int __sched
  17992. +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
  17993. +{
  17994. + BUG();
  17995. + return 0;
  17996. +}
  17997. +
  17998. +#endif
  17999. +
  18000. +static inline int
  18001. +try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
  18002. + struct rt_mutex_waiter *waiter)
  18003. +{
  18004. + return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL);
  18005. +}
  18006. +
  18007. /*
  18008. * Task blocks on lock.
  18009. *
  18010. * Prepare waiter and propagate pi chain
  18011. *
  18012. - * This must be called with lock->wait_lock held.
  18013. + * This must be called with lock->wait_lock held and interrupts disabled
  18014. */
  18015. static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
  18016. struct rt_mutex_waiter *waiter,
  18017. @@ -958,7 +1431,6 @@
  18018. struct rt_mutex_waiter *top_waiter = waiter;
  18019. struct rt_mutex *next_lock;
  18020. int chain_walk = 0, res;
  18021. - unsigned long flags;
  18022. /*
  18023. * Early deadlock detection. We really don't want the task to
  18024. @@ -972,7 +1444,24 @@
  18025. if (owner == task)
  18026. return -EDEADLK;
  18027. - raw_spin_lock_irqsave(&task->pi_lock, flags);
  18028. + raw_spin_lock(&task->pi_lock);
  18029. +
  18030. + /*
  18031. + * In the case of futex requeue PI, this will be a proxy
  18032. + * lock. The task will wake unaware that it is enqueueed on
  18033. + * this lock. Avoid blocking on two locks and corrupting
  18034. + * pi_blocked_on via the PI_WAKEUP_INPROGRESS
  18035. + * flag. futex_wait_requeue_pi() sets this when it wakes up
  18036. + * before requeue (due to a signal or timeout). Do not enqueue
  18037. + * the task if PI_WAKEUP_INPROGRESS is set.
  18038. + */
  18039. + if (task != current && task->pi_blocked_on == PI_WAKEUP_INPROGRESS) {
  18040. + raw_spin_unlock(&task->pi_lock);
  18041. + return -EAGAIN;
  18042. + }
  18043. +
  18044. + BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on));
  18045. +
  18046. __rt_mutex_adjust_prio(task);
  18047. waiter->task = task;
  18048. waiter->lock = lock;
  18049. @@ -985,18 +1474,18 @@
  18050. task->pi_blocked_on = waiter;
  18051. - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  18052. + raw_spin_unlock(&task->pi_lock);
  18053. if (!owner)
  18054. return 0;
  18055. - raw_spin_lock_irqsave(&owner->pi_lock, flags);
  18056. + raw_spin_lock(&owner->pi_lock);
  18057. if (waiter == rt_mutex_top_waiter(lock)) {
  18058. rt_mutex_dequeue_pi(owner, top_waiter);
  18059. rt_mutex_enqueue_pi(owner, waiter);
  18060. __rt_mutex_adjust_prio(owner);
  18061. - if (owner->pi_blocked_on)
  18062. + if (rt_mutex_real_waiter(owner->pi_blocked_on))
  18063. chain_walk = 1;
  18064. } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
  18065. chain_walk = 1;
  18066. @@ -1005,7 +1494,7 @@
  18067. /* Store the lock on which owner is blocked or NULL */
  18068. next_lock = task_blocked_on_lock(owner);
  18069. - raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
  18070. + raw_spin_unlock(&owner->pi_lock);
  18071. /*
  18072. * Even if full deadlock detection is on, if the owner is not
  18073. * blocked itself, we can avoid finding this out in the chain
  18074. @@ -1021,12 +1510,12 @@
  18075. */
  18076. get_task_struct(owner);
  18077. - raw_spin_unlock(&lock->wait_lock);
  18078. + raw_spin_unlock_irq(&lock->wait_lock);
  18079. res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
  18080. next_lock, waiter, task);
  18081. - raw_spin_lock(&lock->wait_lock);
  18082. + raw_spin_lock_irq(&lock->wait_lock);
  18083. return res;
  18084. }
  18085. @@ -1035,15 +1524,15 @@
  18086. * Remove the top waiter from the current tasks pi waiter tree and
  18087. * queue it up.
  18088. *
  18089. - * Called with lock->wait_lock held.
  18090. + * Called with lock->wait_lock held and interrupts disabled.
  18091. */
  18092. static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
  18093. + struct wake_q_head *wake_sleeper_q,
  18094. struct rt_mutex *lock)
  18095. {
  18096. struct rt_mutex_waiter *waiter;
  18097. - unsigned long flags;
  18098. - raw_spin_lock_irqsave(&current->pi_lock, flags);
  18099. + raw_spin_lock(&current->pi_lock);
  18100. waiter = rt_mutex_top_waiter(lock);
  18101. @@ -1065,15 +1554,18 @@
  18102. */
  18103. lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
  18104. - raw_spin_unlock_irqrestore(&current->pi_lock, flags);
  18105. + raw_spin_unlock(&current->pi_lock);
  18106. - wake_q_add(wake_q, waiter->task);
  18107. + if (waiter->savestate)
  18108. + wake_q_add(wake_sleeper_q, waiter->task);
  18109. + else
  18110. + wake_q_add(wake_q, waiter->task);
  18111. }
  18112. /*
  18113. * Remove a waiter from a lock and give up
  18114. *
  18115. - * Must be called with lock->wait_lock held and
  18116. + * Must be called with lock->wait_lock held and interrupts disabled. I must
  18117. * have just failed to try_to_take_rt_mutex().
  18118. */
  18119. static void remove_waiter(struct rt_mutex *lock,
  18120. @@ -1081,13 +1573,12 @@
  18121. {
  18122. bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
  18123. struct task_struct *owner = rt_mutex_owner(lock);
  18124. - struct rt_mutex *next_lock;
  18125. - unsigned long flags;
  18126. + struct rt_mutex *next_lock = NULL;
  18127. - raw_spin_lock_irqsave(&current->pi_lock, flags);
  18128. + raw_spin_lock(&current->pi_lock);
  18129. rt_mutex_dequeue(lock, waiter);
  18130. current->pi_blocked_on = NULL;
  18131. - raw_spin_unlock_irqrestore(&current->pi_lock, flags);
  18132. + raw_spin_unlock(&current->pi_lock);
  18133. /*
  18134. * Only update priority if the waiter was the highest priority
  18135. @@ -1096,7 +1587,7 @@
  18136. if (!owner || !is_top_waiter)
  18137. return;
  18138. - raw_spin_lock_irqsave(&owner->pi_lock, flags);
  18139. + raw_spin_lock(&owner->pi_lock);
  18140. rt_mutex_dequeue_pi(owner, waiter);
  18141. @@ -1106,9 +1597,10 @@
  18142. __rt_mutex_adjust_prio(owner);
  18143. /* Store the lock on which owner is blocked or NULL */
  18144. - next_lock = task_blocked_on_lock(owner);
  18145. + if (rt_mutex_real_waiter(owner->pi_blocked_on))
  18146. + next_lock = task_blocked_on_lock(owner);
  18147. - raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
  18148. + raw_spin_unlock(&owner->pi_lock);
  18149. /*
  18150. * Don't walk the chain, if the owner task is not blocked
  18151. @@ -1120,12 +1612,12 @@
  18152. /* gets dropped in rt_mutex_adjust_prio_chain()! */
  18153. get_task_struct(owner);
  18154. - raw_spin_unlock(&lock->wait_lock);
  18155. + raw_spin_unlock_irq(&lock->wait_lock);
  18156. rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
  18157. next_lock, NULL, current);
  18158. - raw_spin_lock(&lock->wait_lock);
  18159. + raw_spin_lock_irq(&lock->wait_lock);
  18160. }
  18161. /*
  18162. @@ -1142,17 +1634,17 @@
  18163. raw_spin_lock_irqsave(&task->pi_lock, flags);
  18164. waiter = task->pi_blocked_on;
  18165. - if (!waiter || (waiter->prio == task->prio &&
  18166. + if (!rt_mutex_real_waiter(waiter) || (waiter->prio == task->prio &&
  18167. !dl_prio(task->prio))) {
  18168. raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  18169. return;
  18170. }
  18171. next_lock = waiter->lock;
  18172. - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  18173. /* gets dropped in rt_mutex_adjust_prio_chain()! */
  18174. get_task_struct(task);
  18175. + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  18176. rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL,
  18177. next_lock, NULL, task);
  18178. }
  18179. @@ -1161,16 +1653,17 @@
  18180. * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
  18181. * @lock: the rt_mutex to take
  18182. * @state: the state the task should block in (TASK_INTERRUPTIBLE
  18183. - * or TASK_UNINTERRUPTIBLE)
  18184. + * or TASK_UNINTERRUPTIBLE)
  18185. * @timeout: the pre-initialized and started timer, or NULL for none
  18186. * @waiter: the pre-initialized rt_mutex_waiter
  18187. *
  18188. - * lock->wait_lock must be held by the caller.
  18189. + * Must be called with lock->wait_lock held and interrupts disabled
  18190. */
  18191. static int __sched
  18192. __rt_mutex_slowlock(struct rt_mutex *lock, int state,
  18193. struct hrtimer_sleeper *timeout,
  18194. - struct rt_mutex_waiter *waiter)
  18195. + struct rt_mutex_waiter *waiter,
  18196. + struct ww_acquire_ctx *ww_ctx)
  18197. {
  18198. int ret = 0;
  18199. @@ -1193,13 +1686,19 @@
  18200. break;
  18201. }
  18202. - raw_spin_unlock(&lock->wait_lock);
  18203. + if (ww_ctx && ww_ctx->acquired > 0) {
  18204. + ret = __mutex_lock_check_stamp(lock, ww_ctx);
  18205. + if (ret)
  18206. + break;
  18207. + }
  18208. +
  18209. + raw_spin_unlock_irq(&lock->wait_lock);
  18210. debug_rt_mutex_print_deadlock(waiter);
  18211. schedule();
  18212. - raw_spin_lock(&lock->wait_lock);
  18213. + raw_spin_lock_irq(&lock->wait_lock);
  18214. set_current_state(state);
  18215. }
  18216. @@ -1227,26 +1726,112 @@
  18217. }
  18218. }
  18219. +static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
  18220. + struct ww_acquire_ctx *ww_ctx)
  18221. +{
  18222. +#ifdef CONFIG_DEBUG_MUTEXES
  18223. + /*
  18224. + * If this WARN_ON triggers, you used ww_mutex_lock to acquire,
  18225. + * but released with a normal mutex_unlock in this call.
  18226. + *
  18227. + * This should never happen, always use ww_mutex_unlock.
  18228. + */
  18229. + DEBUG_LOCKS_WARN_ON(ww->ctx);
  18230. +
  18231. + /*
  18232. + * Not quite done after calling ww_acquire_done() ?
  18233. + */
  18234. + DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
  18235. +
  18236. + if (ww_ctx->contending_lock) {
  18237. + /*
  18238. + * After -EDEADLK you tried to
  18239. + * acquire a different ww_mutex? Bad!
  18240. + */
  18241. + DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
  18242. +
  18243. + /*
  18244. + * You called ww_mutex_lock after receiving -EDEADLK,
  18245. + * but 'forgot' to unlock everything else first?
  18246. + */
  18247. + DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0);
  18248. + ww_ctx->contending_lock = NULL;
  18249. + }
  18250. +
  18251. + /*
  18252. + * Naughty, using a different class will lead to undefined behavior!
  18253. + */
  18254. + DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
  18255. +#endif
  18256. + ww_ctx->acquired++;
  18257. +}
  18258. +
  18259. +#ifdef CONFIG_PREEMPT_RT_FULL
  18260. +static void ww_mutex_account_lock(struct rt_mutex *lock,
  18261. + struct ww_acquire_ctx *ww_ctx)
  18262. +{
  18263. + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
  18264. + struct rt_mutex_waiter *waiter, *n;
  18265. +
  18266. + /*
  18267. + * This branch gets optimized out for the common case,
  18268. + * and is only important for ww_mutex_lock.
  18269. + */
  18270. + ww_mutex_lock_acquired(ww, ww_ctx);
  18271. + ww->ctx = ww_ctx;
  18272. +
  18273. + /*
  18274. + * Give any possible sleeping processes the chance to wake up,
  18275. + * so they can recheck if they have to back off.
  18276. + */
  18277. + rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters,
  18278. + tree_entry) {
  18279. + /* XXX debug rt mutex waiter wakeup */
  18280. +
  18281. + BUG_ON(waiter->lock != lock);
  18282. + rt_mutex_wake_waiter(waiter);
  18283. + }
  18284. +}
  18285. +
  18286. +#else
  18287. +
  18288. +static void ww_mutex_account_lock(struct rt_mutex *lock,
  18289. + struct ww_acquire_ctx *ww_ctx)
  18290. +{
  18291. + BUG();
  18292. +}
  18293. +#endif
  18294. +
  18295. /*
  18296. * Slow path lock function:
  18297. */
  18298. static int __sched
  18299. rt_mutex_slowlock(struct rt_mutex *lock, int state,
  18300. struct hrtimer_sleeper *timeout,
  18301. - enum rtmutex_chainwalk chwalk)
  18302. + enum rtmutex_chainwalk chwalk,
  18303. + struct ww_acquire_ctx *ww_ctx)
  18304. {
  18305. struct rt_mutex_waiter waiter;
  18306. + unsigned long flags;
  18307. int ret = 0;
  18308. - debug_rt_mutex_init_waiter(&waiter);
  18309. - RB_CLEAR_NODE(&waiter.pi_tree_entry);
  18310. - RB_CLEAR_NODE(&waiter.tree_entry);
  18311. + rt_mutex_init_waiter(&waiter, false);
  18312. - raw_spin_lock(&lock->wait_lock);
  18313. + /*
  18314. + * Technically we could use raw_spin_[un]lock_irq() here, but this can
  18315. + * be called in early boot if the cmpxchg() fast path is disabled
  18316. + * (debug, no architecture support). In this case we will acquire the
  18317. + * rtmutex with lock->wait_lock held. But we cannot unconditionally
  18318. + * enable interrupts in that early boot case. So we need to use the
  18319. + * irqsave/restore variants.
  18320. + */
  18321. + raw_spin_lock_irqsave(&lock->wait_lock, flags);
  18322. /* Try to acquire the lock again: */
  18323. if (try_to_take_rt_mutex(lock, current, NULL)) {
  18324. - raw_spin_unlock(&lock->wait_lock);
  18325. + if (ww_ctx)
  18326. + ww_mutex_account_lock(lock, ww_ctx);
  18327. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  18328. return 0;
  18329. }
  18330. @@ -1260,13 +1845,23 @@
  18331. if (likely(!ret))
  18332. /* sleep on the mutex */
  18333. - ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
  18334. + ret = __rt_mutex_slowlock(lock, state, timeout, &waiter,
  18335. + ww_ctx);
  18336. + else if (ww_ctx) {
  18337. + /* ww_mutex received EDEADLK, let it become EALREADY */
  18338. + ret = __mutex_lock_check_stamp(lock, ww_ctx);
  18339. + BUG_ON(!ret);
  18340. + }
  18341. if (unlikely(ret)) {
  18342. __set_current_state(TASK_RUNNING);
  18343. if (rt_mutex_has_waiters(lock))
  18344. remove_waiter(lock, &waiter);
  18345. - rt_mutex_handle_deadlock(ret, chwalk, &waiter);
  18346. + /* ww_mutex want to report EDEADLK/EALREADY, let them */
  18347. + if (!ww_ctx)
  18348. + rt_mutex_handle_deadlock(ret, chwalk, &waiter);
  18349. + } else if (ww_ctx) {
  18350. + ww_mutex_account_lock(lock, ww_ctx);
  18351. }
  18352. /*
  18353. @@ -1275,7 +1870,7 @@
  18354. */
  18355. fixup_rt_mutex_waiters(lock);
  18356. - raw_spin_unlock(&lock->wait_lock);
  18357. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  18358. /* Remove pending timer: */
  18359. if (unlikely(timeout))
  18360. @@ -1291,6 +1886,7 @@
  18361. */
  18362. static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
  18363. {
  18364. + unsigned long flags;
  18365. int ret;
  18366. /*
  18367. @@ -1302,10 +1898,10 @@
  18368. return 0;
  18369. /*
  18370. - * The mutex has currently no owner. Lock the wait lock and
  18371. - * try to acquire the lock.
  18372. + * The mutex has currently no owner. Lock the wait lock and try to
  18373. + * acquire the lock. We use irqsave here to support early boot calls.
  18374. */
  18375. - raw_spin_lock(&lock->wait_lock);
  18376. + raw_spin_lock_irqsave(&lock->wait_lock, flags);
  18377. ret = try_to_take_rt_mutex(lock, current, NULL);
  18378. @@ -1315,7 +1911,7 @@
  18379. */
  18380. fixup_rt_mutex_waiters(lock);
  18381. - raw_spin_unlock(&lock->wait_lock);
  18382. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  18383. return ret;
  18384. }
  18385. @@ -1325,9 +1921,13 @@
  18386. * Return whether the current task needs to undo a potential priority boosting.
  18387. */
  18388. static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
  18389. - struct wake_q_head *wake_q)
  18390. + struct wake_q_head *wake_q,
  18391. + struct wake_q_head *wake_sleeper_q)
  18392. {
  18393. - raw_spin_lock(&lock->wait_lock);
  18394. + unsigned long flags;
  18395. +
  18396. + /* irqsave required to support early boot calls */
  18397. + raw_spin_lock_irqsave(&lock->wait_lock, flags);
  18398. debug_rt_mutex_unlock(lock);
  18399. @@ -1366,10 +1966,10 @@
  18400. */
  18401. while (!rt_mutex_has_waiters(lock)) {
  18402. /* Drops lock->wait_lock ! */
  18403. - if (unlock_rt_mutex_safe(lock) == true)
  18404. + if (unlock_rt_mutex_safe(lock, flags) == true)
  18405. return false;
  18406. /* Relock the rtmutex and try again */
  18407. - raw_spin_lock(&lock->wait_lock);
  18408. + raw_spin_lock_irqsave(&lock->wait_lock, flags);
  18409. }
  18410. /*
  18411. @@ -1378,9 +1978,9 @@
  18412. *
  18413. * Queue the next waiter for wakeup once we release the wait_lock.
  18414. */
  18415. - mark_wakeup_next_waiter(wake_q, lock);
  18416. + mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock);
  18417. - raw_spin_unlock(&lock->wait_lock);
  18418. + raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  18419. /* check PI boosting */
  18420. return true;
  18421. @@ -1394,31 +1994,36 @@
  18422. */
  18423. static inline int
  18424. rt_mutex_fastlock(struct rt_mutex *lock, int state,
  18425. + struct ww_acquire_ctx *ww_ctx,
  18426. int (*slowfn)(struct rt_mutex *lock, int state,
  18427. struct hrtimer_sleeper *timeout,
  18428. - enum rtmutex_chainwalk chwalk))
  18429. + enum rtmutex_chainwalk chwalk,
  18430. + struct ww_acquire_ctx *ww_ctx))
  18431. {
  18432. if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
  18433. rt_mutex_deadlock_account_lock(lock, current);
  18434. return 0;
  18435. } else
  18436. - return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
  18437. + return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK,
  18438. + ww_ctx);
  18439. }
  18440. static inline int
  18441. rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
  18442. struct hrtimer_sleeper *timeout,
  18443. enum rtmutex_chainwalk chwalk,
  18444. + struct ww_acquire_ctx *ww_ctx,
  18445. int (*slowfn)(struct rt_mutex *lock, int state,
  18446. struct hrtimer_sleeper *timeout,
  18447. - enum rtmutex_chainwalk chwalk))
  18448. + enum rtmutex_chainwalk chwalk,
  18449. + struct ww_acquire_ctx *ww_ctx))
  18450. {
  18451. if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
  18452. likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
  18453. rt_mutex_deadlock_account_lock(lock, current);
  18454. return 0;
  18455. } else
  18456. - return slowfn(lock, state, timeout, chwalk);
  18457. + return slowfn(lock, state, timeout, chwalk, ww_ctx);
  18458. }
  18459. static inline int
  18460. @@ -1435,17 +2040,20 @@
  18461. static inline void
  18462. rt_mutex_fastunlock(struct rt_mutex *lock,
  18463. bool (*slowfn)(struct rt_mutex *lock,
  18464. - struct wake_q_head *wqh))
  18465. + struct wake_q_head *wqh,
  18466. + struct wake_q_head *wq_sleeper))
  18467. {
  18468. WAKE_Q(wake_q);
  18469. + WAKE_Q(wake_sleeper_q);
  18470. if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
  18471. rt_mutex_deadlock_account_unlock(current);
  18472. } else {
  18473. - bool deboost = slowfn(lock, &wake_q);
  18474. + bool deboost = slowfn(lock, &wake_q, &wake_sleeper_q);
  18475. wake_up_q(&wake_q);
  18476. + wake_up_q_sleeper(&wake_sleeper_q);
  18477. /* Undo pi boosting if necessary: */
  18478. if (deboost)
  18479. @@ -1462,7 +2070,7 @@
  18480. {
  18481. might_sleep();
  18482. - rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
  18483. + rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, NULL, rt_mutex_slowlock);
  18484. }
  18485. EXPORT_SYMBOL_GPL(rt_mutex_lock);
  18486. @@ -1479,7 +2087,7 @@
  18487. {
  18488. might_sleep();
  18489. - return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
  18490. + return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, NULL, rt_mutex_slowlock);
  18491. }
  18492. EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
  18493. @@ -1492,11 +2100,30 @@
  18494. might_sleep();
  18495. return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
  18496. - RT_MUTEX_FULL_CHAINWALK,
  18497. + RT_MUTEX_FULL_CHAINWALK, NULL,
  18498. rt_mutex_slowlock);
  18499. }
  18500. /**
  18501. + * rt_mutex_lock_killable - lock a rt_mutex killable
  18502. + *
  18503. + * @lock: the rt_mutex to be locked
  18504. + * @detect_deadlock: deadlock detection on/off
  18505. + *
  18506. + * Returns:
  18507. + * 0 on success
  18508. + * -EINTR when interrupted by a signal
  18509. + * -EDEADLK when the lock would deadlock (when deadlock detection is on)
  18510. + */
  18511. +int __sched rt_mutex_lock_killable(struct rt_mutex *lock)
  18512. +{
  18513. + might_sleep();
  18514. +
  18515. + return rt_mutex_fastlock(lock, TASK_KILLABLE, NULL, rt_mutex_slowlock);
  18516. +}
  18517. +EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
  18518. +
  18519. +/**
  18520. * rt_mutex_timed_lock - lock a rt_mutex interruptible
  18521. * the timeout structure is provided
  18522. * by the caller
  18523. @@ -1516,6 +2143,7 @@
  18524. return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
  18525. RT_MUTEX_MIN_CHAINWALK,
  18526. + NULL,
  18527. rt_mutex_slowlock);
  18528. }
  18529. EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
  18530. @@ -1533,7 +2161,11 @@
  18531. */
  18532. int __sched rt_mutex_trylock(struct rt_mutex *lock)
  18533. {
  18534. +#ifdef CONFIG_PREEMPT_RT_FULL
  18535. + if (WARN_ON_ONCE(in_irq() || in_nmi()))
  18536. +#else
  18537. if (WARN_ON(in_irq() || in_nmi() || in_serving_softirq()))
  18538. +#endif
  18539. return 0;
  18540. return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
  18541. @@ -1559,13 +2191,14 @@
  18542. * required or not.
  18543. */
  18544. bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock,
  18545. - struct wake_q_head *wqh)
  18546. + struct wake_q_head *wqh,
  18547. + struct wake_q_head *wq_sleeper)
  18548. {
  18549. if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
  18550. rt_mutex_deadlock_account_unlock(current);
  18551. return false;
  18552. }
  18553. - return rt_mutex_slowunlock(lock, wqh);
  18554. + return rt_mutex_slowunlock(lock, wqh, wq_sleeper);
  18555. }
  18556. /**
  18557. @@ -1598,13 +2231,12 @@
  18558. void __rt_mutex_init(struct rt_mutex *lock, const char *name)
  18559. {
  18560. lock->owner = NULL;
  18561. - raw_spin_lock_init(&lock->wait_lock);
  18562. lock->waiters = RB_ROOT;
  18563. lock->waiters_leftmost = NULL;
  18564. debug_rt_mutex_init(lock, name);
  18565. }
  18566. -EXPORT_SYMBOL_GPL(__rt_mutex_init);
  18567. +EXPORT_SYMBOL(__rt_mutex_init);
  18568. /**
  18569. * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
  18570. @@ -1619,7 +2251,7 @@
  18571. void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
  18572. struct task_struct *proxy_owner)
  18573. {
  18574. - __rt_mutex_init(lock, NULL);
  18575. + rt_mutex_init(lock);
  18576. debug_rt_mutex_proxy_lock(lock, proxy_owner);
  18577. rt_mutex_set_owner(lock, proxy_owner);
  18578. rt_mutex_deadlock_account_lock(lock, proxy_owner);
  18579. @@ -1660,13 +2292,42 @@
  18580. {
  18581. int ret;
  18582. - raw_spin_lock(&lock->wait_lock);
  18583. + raw_spin_lock_irq(&lock->wait_lock);
  18584. if (try_to_take_rt_mutex(lock, task, NULL)) {
  18585. - raw_spin_unlock(&lock->wait_lock);
  18586. + raw_spin_unlock_irq(&lock->wait_lock);
  18587. return 1;
  18588. }
  18589. +#ifdef CONFIG_PREEMPT_RT_FULL
  18590. + /*
  18591. + * In PREEMPT_RT there's an added race.
  18592. + * If the task, that we are about to requeue, times out,
  18593. + * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue
  18594. + * to skip this task. But right after the task sets
  18595. + * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then
  18596. + * block on the spin_lock(&hb->lock), which in RT is an rtmutex.
  18597. + * This will replace the PI_WAKEUP_INPROGRESS with the actual
  18598. + * lock that it blocks on. We *must not* place this task
  18599. + * on this proxy lock in that case.
  18600. + *
  18601. + * To prevent this race, we first take the task's pi_lock
  18602. + * and check if it has updated its pi_blocked_on. If it has,
  18603. + * we assume that it woke up and we return -EAGAIN.
  18604. + * Otherwise, we set the task's pi_blocked_on to
  18605. + * PI_REQUEUE_INPROGRESS, so that if the task is waking up
  18606. + * it will know that we are in the process of requeuing it.
  18607. + */
  18608. + raw_spin_lock(&task->pi_lock);
  18609. + if (task->pi_blocked_on) {
  18610. + raw_spin_unlock(&task->pi_lock);
  18611. + raw_spin_unlock_irq(&lock->wait_lock);
  18612. + return -EAGAIN;
  18613. + }
  18614. + task->pi_blocked_on = PI_REQUEUE_INPROGRESS;
  18615. + raw_spin_unlock(&task->pi_lock);
  18616. +#endif
  18617. +
  18618. /* We enforce deadlock detection for futexes */
  18619. ret = task_blocks_on_rt_mutex(lock, waiter, task,
  18620. RT_MUTEX_FULL_CHAINWALK);
  18621. @@ -1681,10 +2342,10 @@
  18622. ret = 0;
  18623. }
  18624. - if (unlikely(ret))
  18625. + if (ret && rt_mutex_has_waiters(lock))
  18626. remove_waiter(lock, waiter);
  18627. - raw_spin_unlock(&lock->wait_lock);
  18628. + raw_spin_unlock_irq(&lock->wait_lock);
  18629. debug_rt_mutex_print_deadlock(waiter);
  18630. @@ -1732,12 +2393,12 @@
  18631. {
  18632. int ret;
  18633. - raw_spin_lock(&lock->wait_lock);
  18634. + raw_spin_lock_irq(&lock->wait_lock);
  18635. set_current_state(TASK_INTERRUPTIBLE);
  18636. /* sleep on the mutex */
  18637. - ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
  18638. + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL);
  18639. if (unlikely(ret))
  18640. remove_waiter(lock, waiter);
  18641. @@ -1748,7 +2409,93 @@
  18642. */
  18643. fixup_rt_mutex_waiters(lock);
  18644. - raw_spin_unlock(&lock->wait_lock);
  18645. + raw_spin_unlock_irq(&lock->wait_lock);
  18646. +
  18647. + return ret;
  18648. +}
  18649. +
  18650. +static inline int
  18651. +ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
  18652. +{
  18653. +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
  18654. + unsigned tmp;
  18655. +
  18656. + if (ctx->deadlock_inject_countdown-- == 0) {
  18657. + tmp = ctx->deadlock_inject_interval;
  18658. + if (tmp > UINT_MAX/4)
  18659. + tmp = UINT_MAX;
  18660. + else
  18661. + tmp = tmp*2 + tmp + tmp/2;
  18662. +
  18663. + ctx->deadlock_inject_interval = tmp;
  18664. + ctx->deadlock_inject_countdown = tmp;
  18665. + ctx->contending_lock = lock;
  18666. +
  18667. + ww_mutex_unlock(lock);
  18668. +
  18669. + return -EDEADLK;
  18670. + }
  18671. +#endif
  18672. +
  18673. + return 0;
  18674. +}
  18675. +
  18676. +#ifdef CONFIG_PREEMPT_RT_FULL
  18677. +int __sched
  18678. +__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
  18679. +{
  18680. + int ret;
  18681. +
  18682. + might_sleep();
  18683. +
  18684. + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
  18685. + ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, ww_ctx);
  18686. + if (ret)
  18687. + mutex_release(&lock->base.dep_map, 1, _RET_IP_);
  18688. + else if (!ret && ww_ctx->acquired > 1)
  18689. + return ww_mutex_deadlock_injection(lock, ww_ctx);
  18690. return ret;
  18691. }
  18692. +EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
  18693. +
  18694. +int __sched
  18695. +__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
  18696. +{
  18697. + int ret;
  18698. +
  18699. + might_sleep();
  18700. +
  18701. + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
  18702. + ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, ww_ctx);
  18703. + if (ret)
  18704. + mutex_release(&lock->base.dep_map, 1, _RET_IP_);
  18705. + else if (!ret && ww_ctx->acquired > 1)
  18706. + return ww_mutex_deadlock_injection(lock, ww_ctx);
  18707. +
  18708. + return ret;
  18709. +}
  18710. +EXPORT_SYMBOL_GPL(__ww_mutex_lock);
  18711. +
  18712. +void __sched ww_mutex_unlock(struct ww_mutex *lock)
  18713. +{
  18714. + int nest = !!lock->ctx;
  18715. +
  18716. + /*
  18717. + * The unlocking fastpath is the 0->1 transition from 'locked'
  18718. + * into 'unlocked' state:
  18719. + */
  18720. + if (nest) {
  18721. +#ifdef CONFIG_DEBUG_MUTEXES
  18722. + DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
  18723. +#endif
  18724. + if (lock->ctx->acquired > 0)
  18725. + lock->ctx->acquired--;
  18726. + lock->ctx = NULL;
  18727. + }
  18728. +
  18729. + mutex_release(&lock->base.dep_map, nest, _RET_IP_);
  18730. + rt_mutex_unlock(&lock->base.lock);
  18731. +}
  18732. +EXPORT_SYMBOL(ww_mutex_unlock);
  18733. +#endif
  18734. diff -Nur linux-4.4.62.orig/kernel/locking/rtmutex_common.h linux-4.4.62/kernel/locking/rtmutex_common.h
  18735. --- linux-4.4.62.orig/kernel/locking/rtmutex_common.h 2017-04-18 07:15:37.000000000 +0200
  18736. +++ linux-4.4.62/kernel/locking/rtmutex_common.h 2017-04-18 17:38:08.210649865 +0200
  18737. @@ -27,6 +27,7 @@
  18738. struct rb_node pi_tree_entry;
  18739. struct task_struct *task;
  18740. struct rt_mutex *lock;
  18741. + bool savestate;
  18742. #ifdef CONFIG_DEBUG_RT_MUTEXES
  18743. unsigned long ip;
  18744. struct pid *deadlock_task_pid;
  18745. @@ -98,6 +99,9 @@
  18746. /*
  18747. * PI-futex support (proxy locking functions, etc.):
  18748. */
  18749. +#define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1)
  18750. +#define PI_REQUEUE_INPROGRESS ((struct rt_mutex_waiter *) 2)
  18751. +
  18752. extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
  18753. extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
  18754. struct task_struct *proxy_owner);
  18755. @@ -111,7 +115,8 @@
  18756. struct rt_mutex_waiter *waiter);
  18757. extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to);
  18758. extern bool rt_mutex_futex_unlock(struct rt_mutex *lock,
  18759. - struct wake_q_head *wqh);
  18760. + struct wake_q_head *wqh,
  18761. + struct wake_q_head *wq_sleeper);
  18762. extern void rt_mutex_adjust_prio(struct task_struct *task);
  18763. #ifdef CONFIG_DEBUG_RT_MUTEXES
  18764. @@ -120,4 +125,14 @@
  18765. # include "rtmutex.h"
  18766. #endif
  18767. +static inline void
  18768. +rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate)
  18769. +{
  18770. + debug_rt_mutex_init_waiter(waiter);
  18771. + waiter->task = NULL;
  18772. + waiter->savestate = savestate;
  18773. + RB_CLEAR_NODE(&waiter->pi_tree_entry);
  18774. + RB_CLEAR_NODE(&waiter->tree_entry);
  18775. +}
  18776. +
  18777. #endif
  18778. diff -Nur linux-4.4.62.orig/kernel/locking/spinlock.c linux-4.4.62/kernel/locking/spinlock.c
  18779. --- linux-4.4.62.orig/kernel/locking/spinlock.c 2017-04-18 07:15:37.000000000 +0200
  18780. +++ linux-4.4.62/kernel/locking/spinlock.c 2017-04-18 17:38:08.210649865 +0200
  18781. @@ -124,8 +124,11 @@
  18782. * __[spin|read|write]_lock_bh()
  18783. */
  18784. BUILD_LOCK_OPS(spin, raw_spinlock);
  18785. +
  18786. +#ifndef CONFIG_PREEMPT_RT_FULL
  18787. BUILD_LOCK_OPS(read, rwlock);
  18788. BUILD_LOCK_OPS(write, rwlock);
  18789. +#endif
  18790. #endif
  18791. @@ -209,6 +212,8 @@
  18792. EXPORT_SYMBOL(_raw_spin_unlock_bh);
  18793. #endif
  18794. +#ifndef CONFIG_PREEMPT_RT_FULL
  18795. +
  18796. #ifndef CONFIG_INLINE_READ_TRYLOCK
  18797. int __lockfunc _raw_read_trylock(rwlock_t *lock)
  18798. {
  18799. @@ -353,6 +358,8 @@
  18800. EXPORT_SYMBOL(_raw_write_unlock_bh);
  18801. #endif
  18802. +#endif /* !PREEMPT_RT_FULL */
  18803. +
  18804. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  18805. void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
  18806. diff -Nur linux-4.4.62.orig/kernel/locking/spinlock_debug.c linux-4.4.62/kernel/locking/spinlock_debug.c
  18807. --- linux-4.4.62.orig/kernel/locking/spinlock_debug.c 2017-04-18 07:15:37.000000000 +0200
  18808. +++ linux-4.4.62/kernel/locking/spinlock_debug.c 2017-04-18 17:38:08.210649865 +0200
  18809. @@ -31,6 +31,7 @@
  18810. EXPORT_SYMBOL(__raw_spin_lock_init);
  18811. +#ifndef CONFIG_PREEMPT_RT_FULL
  18812. void __rwlock_init(rwlock_t *lock, const char *name,
  18813. struct lock_class_key *key)
  18814. {
  18815. @@ -48,6 +49,7 @@
  18816. }
  18817. EXPORT_SYMBOL(__rwlock_init);
  18818. +#endif
  18819. static void spin_dump(raw_spinlock_t *lock, const char *msg)
  18820. {
  18821. @@ -159,6 +161,7 @@
  18822. arch_spin_unlock(&lock->raw_lock);
  18823. }
  18824. +#ifndef CONFIG_PREEMPT_RT_FULL
  18825. static void rwlock_bug(rwlock_t *lock, const char *msg)
  18826. {
  18827. if (!debug_locks_off())
  18828. @@ -300,3 +303,5 @@
  18829. debug_write_unlock(lock);
  18830. arch_write_unlock(&lock->raw_lock);
  18831. }
  18832. +
  18833. +#endif
  18834. diff -Nur linux-4.4.62.orig/kernel/module.c linux-4.4.62/kernel/module.c
  18835. --- linux-4.4.62.orig/kernel/module.c 2017-04-18 07:15:37.000000000 +0200
  18836. +++ linux-4.4.62/kernel/module.c 2017-04-18 17:38:08.210649865 +0200
  18837. @@ -682,16 +682,7 @@
  18838. memcpy(per_cpu_ptr(mod->percpu, cpu), from, size);
  18839. }
  18840. -/**
  18841. - * is_module_percpu_address - test whether address is from module static percpu
  18842. - * @addr: address to test
  18843. - *
  18844. - * Test whether @addr belongs to module static percpu area.
  18845. - *
  18846. - * RETURNS:
  18847. - * %true if @addr is from module static percpu area
  18848. - */
  18849. -bool is_module_percpu_address(unsigned long addr)
  18850. +bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr)
  18851. {
  18852. struct module *mod;
  18853. unsigned int cpu;
  18854. @@ -705,9 +696,11 @@
  18855. continue;
  18856. for_each_possible_cpu(cpu) {
  18857. void *start = per_cpu_ptr(mod->percpu, cpu);
  18858. + void *va = (void *)addr;
  18859. - if ((void *)addr >= start &&
  18860. - (void *)addr < start + mod->percpu_size) {
  18861. + if (va >= start && va < start + mod->percpu_size) {
  18862. + if (can_addr)
  18863. + *can_addr = (unsigned long) (va - start);
  18864. preempt_enable();
  18865. return true;
  18866. }
  18867. @@ -718,6 +711,20 @@
  18868. return false;
  18869. }
  18870. +/**
  18871. + * is_module_percpu_address - test whether address is from module static percpu
  18872. + * @addr: address to test
  18873. + *
  18874. + * Test whether @addr belongs to module static percpu area.
  18875. + *
  18876. + * RETURNS:
  18877. + * %true if @addr is from module static percpu area
  18878. + */
  18879. +bool is_module_percpu_address(unsigned long addr)
  18880. +{
  18881. + return __is_module_percpu_address(addr, NULL);
  18882. +}
  18883. +
  18884. #else /* ... !CONFIG_SMP */
  18885. static inline void __percpu *mod_percpu(struct module *mod)
  18886. @@ -748,6 +755,11 @@
  18887. {
  18888. return false;
  18889. }
  18890. +
  18891. +bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr)
  18892. +{
  18893. + return false;
  18894. +}
  18895. #endif /* CONFIG_SMP */
  18896. diff -Nur linux-4.4.62.orig/kernel/panic.c linux-4.4.62/kernel/panic.c
  18897. --- linux-4.4.62.orig/kernel/panic.c 2017-04-18 07:15:37.000000000 +0200
  18898. +++ linux-4.4.62/kernel/panic.c 2017-04-18 17:38:08.210649865 +0200
  18899. @@ -61,6 +61,37 @@
  18900. cpu_relax();
  18901. }
  18902. +/*
  18903. + * Stop ourselves in NMI context if another CPU has already panicked. Arch code
  18904. + * may override this to prepare for crash dumping, e.g. save regs info.
  18905. + */
  18906. +void __weak nmi_panic_self_stop(struct pt_regs *regs)
  18907. +{
  18908. + panic_smp_self_stop();
  18909. +}
  18910. +
  18911. +atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
  18912. +
  18913. +/*
  18914. + * A variant of panic() called from NMI context. We return if we've already
  18915. + * panicked on this CPU. If another CPU already panicked, loop in
  18916. + * nmi_panic_self_stop() which can provide architecture dependent code such
  18917. + * as saving register state for crash dump.
  18918. + */
  18919. +void nmi_panic(struct pt_regs *regs, const char *msg)
  18920. +{
  18921. + int old_cpu, cpu;
  18922. +
  18923. + cpu = raw_smp_processor_id();
  18924. + old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu);
  18925. +
  18926. + if (old_cpu == PANIC_CPU_INVALID)
  18927. + panic("%s", msg);
  18928. + else if (old_cpu != cpu)
  18929. + nmi_panic_self_stop(regs);
  18930. +}
  18931. +EXPORT_SYMBOL(nmi_panic);
  18932. +
  18933. /**
  18934. * panic - halt the system
  18935. * @fmt: The text string to print
  18936. @@ -71,17 +102,17 @@
  18937. */
  18938. void panic(const char *fmt, ...)
  18939. {
  18940. - static DEFINE_SPINLOCK(panic_lock);
  18941. static char buf[1024];
  18942. va_list args;
  18943. long i, i_next = 0;
  18944. int state = 0;
  18945. + int old_cpu, this_cpu;
  18946. /*
  18947. * Disable local interrupts. This will prevent panic_smp_self_stop
  18948. * from deadlocking the first cpu that invokes the panic, since
  18949. * there is nothing to prevent an interrupt handler (that runs
  18950. - * after the panic_lock is acquired) from invoking panic again.
  18951. + * after setting panic_cpu) from invoking panic() again.
  18952. */
  18953. local_irq_disable();
  18954. @@ -94,8 +125,16 @@
  18955. * multiple parallel invocations of panic, all other CPUs either
  18956. * stop themself or will wait until they are stopped by the 1st CPU
  18957. * with smp_send_stop().
  18958. + *
  18959. + * `old_cpu == PANIC_CPU_INVALID' means this is the 1st CPU which
  18960. + * comes here, so go ahead.
  18961. + * `old_cpu == this_cpu' means we came from nmi_panic() which sets
  18962. + * panic_cpu to this CPU. In this case, this is also the 1st CPU.
  18963. */
  18964. - if (!spin_trylock(&panic_lock))
  18965. + this_cpu = raw_smp_processor_id();
  18966. + old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
  18967. +
  18968. + if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu)
  18969. panic_smp_self_stop();
  18970. console_verbose();
  18971. @@ -400,9 +439,11 @@
  18972. static int init_oops_id(void)
  18973. {
  18974. +#ifndef CONFIG_PREEMPT_RT_FULL
  18975. if (!oops_id)
  18976. get_random_bytes(&oops_id, sizeof(oops_id));
  18977. else
  18978. +#endif
  18979. oops_id++;
  18980. return 0;
  18981. diff -Nur linux-4.4.62.orig/kernel/power/hibernate.c linux-4.4.62/kernel/power/hibernate.c
  18982. --- linux-4.4.62.orig/kernel/power/hibernate.c 2017-04-18 07:15:37.000000000 +0200
  18983. +++ linux-4.4.62/kernel/power/hibernate.c 2017-04-18 17:38:08.210649865 +0200
  18984. @@ -285,6 +285,8 @@
  18985. local_irq_disable();
  18986. + system_state = SYSTEM_SUSPEND;
  18987. +
  18988. error = syscore_suspend();
  18989. if (error) {
  18990. printk(KERN_ERR "PM: Some system devices failed to power down, "
  18991. @@ -314,6 +316,7 @@
  18992. syscore_resume();
  18993. Enable_irqs:
  18994. + system_state = SYSTEM_RUNNING;
  18995. local_irq_enable();
  18996. Enable_cpus:
  18997. @@ -438,6 +441,7 @@
  18998. goto Enable_cpus;
  18999. local_irq_disable();
  19000. + system_state = SYSTEM_SUSPEND;
  19001. error = syscore_suspend();
  19002. if (error)
  19003. @@ -471,6 +475,7 @@
  19004. syscore_resume();
  19005. Enable_irqs:
  19006. + system_state = SYSTEM_RUNNING;
  19007. local_irq_enable();
  19008. Enable_cpus:
  19009. @@ -556,6 +561,7 @@
  19010. goto Enable_cpus;
  19011. local_irq_disable();
  19012. + system_state = SYSTEM_SUSPEND;
  19013. syscore_suspend();
  19014. if (pm_wakeup_pending()) {
  19015. error = -EAGAIN;
  19016. @@ -568,6 +574,7 @@
  19017. Power_up:
  19018. syscore_resume();
  19019. + system_state = SYSTEM_RUNNING;
  19020. local_irq_enable();
  19021. Enable_cpus:
  19022. @@ -642,6 +649,10 @@
  19023. cpu_relax();
  19024. }
  19025. +#ifndef CONFIG_SUSPEND
  19026. +bool pm_in_action;
  19027. +#endif
  19028. +
  19029. /**
  19030. * hibernate - Carry out system hibernation, including saving the image.
  19031. */
  19032. @@ -654,6 +665,8 @@
  19033. return -EPERM;
  19034. }
  19035. + pm_in_action = true;
  19036. +
  19037. lock_system_sleep();
  19038. /* The snapshot device should not be opened while we're running */
  19039. if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
  19040. @@ -719,6 +732,7 @@
  19041. atomic_inc(&snapshot_device_available);
  19042. Unlock:
  19043. unlock_system_sleep();
  19044. + pm_in_action = false;
  19045. return error;
  19046. }
  19047. diff -Nur linux-4.4.62.orig/kernel/power/suspend.c linux-4.4.62/kernel/power/suspend.c
  19048. --- linux-4.4.62.orig/kernel/power/suspend.c 2017-04-18 07:15:37.000000000 +0200
  19049. +++ linux-4.4.62/kernel/power/suspend.c 2017-04-18 17:38:08.210649865 +0200
  19050. @@ -359,6 +359,8 @@
  19051. arch_suspend_disable_irqs();
  19052. BUG_ON(!irqs_disabled());
  19053. + system_state = SYSTEM_SUSPEND;
  19054. +
  19055. error = syscore_suspend();
  19056. if (!error) {
  19057. *wakeup = pm_wakeup_pending();
  19058. @@ -375,6 +377,8 @@
  19059. syscore_resume();
  19060. }
  19061. + system_state = SYSTEM_RUNNING;
  19062. +
  19063. arch_suspend_enable_irqs();
  19064. BUG_ON(irqs_disabled());
  19065. @@ -518,6 +522,8 @@
  19066. return error;
  19067. }
  19068. +bool pm_in_action;
  19069. +
  19070. /**
  19071. * pm_suspend - Externally visible function for suspending the system.
  19072. * @state: System sleep state to enter.
  19073. @@ -532,6 +538,8 @@
  19074. if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX)
  19075. return -EINVAL;
  19076. + pm_in_action = true;
  19077. +
  19078. error = enter_state(state);
  19079. if (error) {
  19080. suspend_stats.fail++;
  19081. @@ -539,6 +547,7 @@
  19082. } else {
  19083. suspend_stats.success++;
  19084. }
  19085. + pm_in_action = false;
  19086. return error;
  19087. }
  19088. EXPORT_SYMBOL(pm_suspend);
  19089. diff -Nur linux-4.4.62.orig/kernel/printk/printk.c linux-4.4.62/kernel/printk/printk.c
  19090. --- linux-4.4.62.orig/kernel/printk/printk.c 2017-04-18 07:15:37.000000000 +0200
  19091. +++ linux-4.4.62/kernel/printk/printk.c 2017-04-18 17:38:08.214650020 +0200
  19092. @@ -241,6 +241,65 @@
  19093. */
  19094. static DEFINE_RAW_SPINLOCK(logbuf_lock);
  19095. +#ifdef CONFIG_EARLY_PRINTK
  19096. +struct console *early_console;
  19097. +
  19098. +static void early_vprintk(const char *fmt, va_list ap)
  19099. +{
  19100. + if (early_console) {
  19101. + char buf[512];
  19102. + int n = vscnprintf(buf, sizeof(buf), fmt, ap);
  19103. +
  19104. + early_console->write(early_console, buf, n);
  19105. + }
  19106. +}
  19107. +
  19108. +asmlinkage void early_printk(const char *fmt, ...)
  19109. +{
  19110. + va_list ap;
  19111. +
  19112. + va_start(ap, fmt);
  19113. + early_vprintk(fmt, ap);
  19114. + va_end(ap);
  19115. +}
  19116. +
  19117. +/*
  19118. + * This is independent of any log levels - a global
  19119. + * kill switch that turns off all of printk.
  19120. + *
  19121. + * Used by the NMI watchdog if early-printk is enabled.
  19122. + */
  19123. +static bool __read_mostly printk_killswitch;
  19124. +
  19125. +static int __init force_early_printk_setup(char *str)
  19126. +{
  19127. + printk_killswitch = true;
  19128. + return 0;
  19129. +}
  19130. +early_param("force_early_printk", force_early_printk_setup);
  19131. +
  19132. +void printk_kill(void)
  19133. +{
  19134. + printk_killswitch = true;
  19135. +}
  19136. +
  19137. +#ifdef CONFIG_PRINTK
  19138. +static int forced_early_printk(const char *fmt, va_list ap)
  19139. +{
  19140. + if (!printk_killswitch)
  19141. + return 0;
  19142. + early_vprintk(fmt, ap);
  19143. + return 1;
  19144. +}
  19145. +#endif
  19146. +
  19147. +#else
  19148. +static inline int forced_early_printk(const char *fmt, va_list ap)
  19149. +{
  19150. + return 0;
  19151. +}
  19152. +#endif
  19153. +
  19154. #ifdef CONFIG_PRINTK
  19155. DECLARE_WAIT_QUEUE_HEAD(log_wait);
  19156. /* the next printk record to read by syslog(READ) or /proc/kmsg */
  19157. @@ -1203,6 +1262,7 @@
  19158. {
  19159. char *text;
  19160. int len = 0;
  19161. + int attempts = 0;
  19162. text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
  19163. if (!text)
  19164. @@ -1214,7 +1274,14 @@
  19165. u64 seq;
  19166. u32 idx;
  19167. enum log_flags prev;
  19168. -
  19169. + int num_msg;
  19170. +try_again:
  19171. + attempts++;
  19172. + if (attempts > 10) {
  19173. + len = -EBUSY;
  19174. + goto out;
  19175. + }
  19176. + num_msg = 0;
  19177. if (clear_seq < log_first_seq) {
  19178. /* messages are gone, move to first available one */
  19179. clear_seq = log_first_seq;
  19180. @@ -1235,6 +1302,14 @@
  19181. prev = msg->flags;
  19182. idx = log_next(idx);
  19183. seq++;
  19184. + num_msg++;
  19185. + if (num_msg > 5) {
  19186. + num_msg = 0;
  19187. + raw_spin_unlock_irq(&logbuf_lock);
  19188. + raw_spin_lock_irq(&logbuf_lock);
  19189. + if (clear_seq < log_first_seq)
  19190. + goto try_again;
  19191. + }
  19192. }
  19193. /* move first record forward until length fits into the buffer */
  19194. @@ -1248,6 +1323,14 @@
  19195. prev = msg->flags;
  19196. idx = log_next(idx);
  19197. seq++;
  19198. + num_msg++;
  19199. + if (num_msg > 5) {
  19200. + num_msg = 0;
  19201. + raw_spin_unlock_irq(&logbuf_lock);
  19202. + raw_spin_lock_irq(&logbuf_lock);
  19203. + if (clear_seq < log_first_seq)
  19204. + goto try_again;
  19205. + }
  19206. }
  19207. /* last message fitting into this dump */
  19208. @@ -1288,6 +1371,7 @@
  19209. clear_seq = log_next_seq;
  19210. clear_idx = log_next_idx;
  19211. }
  19212. +out:
  19213. raw_spin_unlock_irq(&logbuf_lock);
  19214. kfree(text);
  19215. @@ -1443,6 +1527,12 @@
  19216. if (!console_drivers)
  19217. return;
  19218. + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) {
  19219. + if (in_irq() || in_nmi())
  19220. + return;
  19221. + }
  19222. +
  19223. + migrate_disable();
  19224. for_each_console(con) {
  19225. if (exclusive_console && con != exclusive_console)
  19226. continue;
  19227. @@ -1458,6 +1548,7 @@
  19228. else
  19229. con->write(con, text, len);
  19230. }
  19231. + migrate_enable();
  19232. }
  19233. /*
  19234. @@ -1518,6 +1609,15 @@
  19235. static int console_trylock_for_printk(void)
  19236. {
  19237. unsigned int cpu = smp_processor_id();
  19238. +#ifdef CONFIG_PREEMPT_RT_FULL
  19239. + int lock = !early_boot_irqs_disabled && (preempt_count() == 0) &&
  19240. + !irqs_disabled();
  19241. +#else
  19242. + int lock = 1;
  19243. +#endif
  19244. +
  19245. + if (!lock)
  19246. + return 0;
  19247. if (!console_trylock())
  19248. return 0;
  19249. @@ -1672,6 +1772,13 @@
  19250. /* cpu currently holding logbuf_lock in this function */
  19251. static unsigned int logbuf_cpu = UINT_MAX;
  19252. + /*
  19253. + * Fall back to early_printk if a debugging subsystem has
  19254. + * killed printk output
  19255. + */
  19256. + if (unlikely(forced_early_printk(fmt, args)))
  19257. + return 1;
  19258. +
  19259. if (level == LOGLEVEL_SCHED) {
  19260. level = LOGLEVEL_DEFAULT;
  19261. in_sched = true;
  19262. @@ -1813,8 +1920,7 @@
  19263. * console_sem which would prevent anyone from printing to
  19264. * console
  19265. */
  19266. - preempt_disable();
  19267. -
  19268. + migrate_disable();
  19269. /*
  19270. * Try to acquire and then immediately release the console
  19271. * semaphore. The release will print out buffers and wake up
  19272. @@ -1822,7 +1928,7 @@
  19273. */
  19274. if (console_trylock_for_printk())
  19275. console_unlock();
  19276. - preempt_enable();
  19277. + migrate_enable();
  19278. lockdep_on();
  19279. }
  19280. @@ -1961,26 +2067,6 @@
  19281. #endif /* CONFIG_PRINTK */
  19282. -#ifdef CONFIG_EARLY_PRINTK
  19283. -struct console *early_console;
  19284. -
  19285. -asmlinkage __visible void early_printk(const char *fmt, ...)
  19286. -{
  19287. - va_list ap;
  19288. - char buf[512];
  19289. - int n;
  19290. -
  19291. - if (!early_console)
  19292. - return;
  19293. -
  19294. - va_start(ap, fmt);
  19295. - n = vscnprintf(buf, sizeof(buf), fmt, ap);
  19296. - va_end(ap);
  19297. -
  19298. - early_console->write(early_console, buf, n);
  19299. -}
  19300. -#endif
  19301. -
  19302. static int __add_preferred_console(char *name, int idx, char *options,
  19303. char *brl_options)
  19304. {
  19305. @@ -2202,11 +2288,16 @@
  19306. goto out;
  19307. len = cont_print_text(text, size);
  19308. +#ifdef CONFIG_PREEMPT_RT_FULL
  19309. + raw_spin_unlock_irqrestore(&logbuf_lock, flags);
  19310. + call_console_drivers(cont.level, NULL, 0, text, len);
  19311. +#else
  19312. raw_spin_unlock(&logbuf_lock);
  19313. stop_critical_timings();
  19314. call_console_drivers(cont.level, NULL, 0, text, len);
  19315. start_critical_timings();
  19316. local_irq_restore(flags);
  19317. +#endif
  19318. return;
  19319. out:
  19320. raw_spin_unlock_irqrestore(&logbuf_lock, flags);
  19321. @@ -2316,13 +2407,17 @@
  19322. console_idx = log_next(console_idx);
  19323. console_seq++;
  19324. console_prev = msg->flags;
  19325. +#ifdef CONFIG_PREEMPT_RT_FULL
  19326. + raw_spin_unlock_irqrestore(&logbuf_lock, flags);
  19327. + call_console_drivers(level, ext_text, ext_len, text, len);
  19328. +#else
  19329. raw_spin_unlock(&logbuf_lock);
  19330. stop_critical_timings(); /* don't trace print latency */
  19331. call_console_drivers(level, ext_text, ext_len, text, len);
  19332. start_critical_timings();
  19333. local_irq_restore(flags);
  19334. -
  19335. +#endif
  19336. if (do_cond_resched)
  19337. cond_resched();
  19338. }
  19339. @@ -2374,6 +2469,11 @@
  19340. {
  19341. struct console *c;
  19342. + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) {
  19343. + if (in_irq() || in_nmi())
  19344. + return;
  19345. + }
  19346. +
  19347. /*
  19348. * console_unblank can no longer be called in interrupt context unless
  19349. * oops_in_progress is set to 1..
  19350. diff -Nur linux-4.4.62.orig/kernel/ptrace.c linux-4.4.62/kernel/ptrace.c
  19351. --- linux-4.4.62.orig/kernel/ptrace.c 2017-04-18 07:15:37.000000000 +0200
  19352. +++ linux-4.4.62/kernel/ptrace.c 2017-04-18 17:38:08.214650020 +0200
  19353. @@ -136,7 +136,14 @@
  19354. spin_lock_irq(&task->sighand->siglock);
  19355. if (task_is_traced(task) && !__fatal_signal_pending(task)) {
  19356. - task->state = __TASK_TRACED;
  19357. + unsigned long flags;
  19358. +
  19359. + raw_spin_lock_irqsave(&task->pi_lock, flags);
  19360. + if (task->state & __TASK_TRACED)
  19361. + task->state = __TASK_TRACED;
  19362. + else
  19363. + task->saved_state = __TASK_TRACED;
  19364. + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  19365. ret = true;
  19366. }
  19367. spin_unlock_irq(&task->sighand->siglock);
  19368. diff -Nur linux-4.4.62.orig/kernel/rcu/rcutorture.c linux-4.4.62/kernel/rcu/rcutorture.c
  19369. --- linux-4.4.62.orig/kernel/rcu/rcutorture.c 2017-04-18 07:15:37.000000000 +0200
  19370. +++ linux-4.4.62/kernel/rcu/rcutorture.c 2017-04-18 17:38:08.214650020 +0200
  19371. @@ -390,6 +390,7 @@
  19372. .name = "rcu"
  19373. };
  19374. +#ifndef CONFIG_PREEMPT_RT_FULL
  19375. /*
  19376. * Definitions for rcu_bh torture testing.
  19377. */
  19378. @@ -429,6 +430,12 @@
  19379. .name = "rcu_bh"
  19380. };
  19381. +#else
  19382. +static struct rcu_torture_ops rcu_bh_ops = {
  19383. + .ttype = INVALID_RCU_FLAVOR,
  19384. +};
  19385. +#endif
  19386. +
  19387. /*
  19388. * Don't even think about trying any of these in real life!!!
  19389. * The names includes "busted", and they really means it!
  19390. diff -Nur linux-4.4.62.orig/kernel/rcu/tree.c linux-4.4.62/kernel/rcu/tree.c
  19391. --- linux-4.4.62.orig/kernel/rcu/tree.c 2017-04-18 07:15:37.000000000 +0200
  19392. +++ linux-4.4.62/kernel/rcu/tree.c 2017-04-18 17:38:08.214650020 +0200
  19393. @@ -56,6 +56,11 @@
  19394. #include <linux/random.h>
  19395. #include <linux/trace_events.h>
  19396. #include <linux/suspend.h>
  19397. +#include <linux/delay.h>
  19398. +#include <linux/gfp.h>
  19399. +#include <linux/oom.h>
  19400. +#include <linux/smpboot.h>
  19401. +#include "../time/tick-internal.h"
  19402. #include "tree.h"
  19403. #include "rcu.h"
  19404. @@ -266,6 +271,19 @@
  19405. }
  19406. }
  19407. +#ifdef CONFIG_PREEMPT_RT_FULL
  19408. +static void rcu_preempt_qs(void);
  19409. +
  19410. +void rcu_bh_qs(void)
  19411. +{
  19412. + unsigned long flags;
  19413. +
  19414. + /* Callers to this function, rcu_preempt_qs(), must disable irqs. */
  19415. + local_irq_save(flags);
  19416. + rcu_preempt_qs();
  19417. + local_irq_restore(flags);
  19418. +}
  19419. +#else
  19420. void rcu_bh_qs(void)
  19421. {
  19422. if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) {
  19423. @@ -275,6 +293,7 @@
  19424. __this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false);
  19425. }
  19426. }
  19427. +#endif
  19428. static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
  19429. @@ -435,11 +454,13 @@
  19430. /*
  19431. * Return the number of RCU BH batches started thus far for debug & stats.
  19432. */
  19433. +#ifndef CONFIG_PREEMPT_RT_FULL
  19434. unsigned long rcu_batches_started_bh(void)
  19435. {
  19436. return rcu_bh_state.gpnum;
  19437. }
  19438. EXPORT_SYMBOL_GPL(rcu_batches_started_bh);
  19439. +#endif
  19440. /*
  19441. * Return the number of RCU batches completed thus far for debug & stats.
  19442. @@ -459,6 +480,7 @@
  19443. }
  19444. EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
  19445. +#ifndef CONFIG_PREEMPT_RT_FULL
  19446. /*
  19447. * Return the number of RCU BH batches completed thus far for debug & stats.
  19448. */
  19449. @@ -486,6 +508,13 @@
  19450. }
  19451. EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
  19452. +#else
  19453. +void rcu_force_quiescent_state(void)
  19454. +{
  19455. +}
  19456. +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
  19457. +#endif
  19458. +
  19459. /*
  19460. * Force a quiescent state for RCU-sched.
  19461. */
  19462. @@ -536,9 +565,11 @@
  19463. case RCU_FLAVOR:
  19464. rsp = rcu_state_p;
  19465. break;
  19466. +#ifndef CONFIG_PREEMPT_RT_FULL
  19467. case RCU_BH_FLAVOR:
  19468. rsp = &rcu_bh_state;
  19469. break;
  19470. +#endif
  19471. case RCU_SCHED_FLAVOR:
  19472. rsp = &rcu_sched_state;
  19473. break;
  19474. @@ -1590,7 +1621,6 @@
  19475. int needmore;
  19476. struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
  19477. - rcu_nocb_gp_cleanup(rsp, rnp);
  19478. rnp->need_future_gp[c & 0x1] = 0;
  19479. needmore = rnp->need_future_gp[(c + 1) & 0x1];
  19480. trace_rcu_future_gp(rnp, rdp, c,
  19481. @@ -1611,7 +1641,7 @@
  19482. !READ_ONCE(rsp->gp_flags) ||
  19483. !rsp->gp_kthread)
  19484. return;
  19485. - wake_up(&rsp->gp_wq);
  19486. + swake_up(&rsp->gp_wq);
  19487. }
  19488. /*
  19489. @@ -1991,6 +2021,7 @@
  19490. int nocb = 0;
  19491. struct rcu_data *rdp;
  19492. struct rcu_node *rnp = rcu_get_root(rsp);
  19493. + struct swait_queue_head *sq;
  19494. WRITE_ONCE(rsp->gp_activity, jiffies);
  19495. raw_spin_lock_irq(&rnp->lock);
  19496. @@ -2029,7 +2060,9 @@
  19497. needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;
  19498. /* smp_mb() provided by prior unlock-lock pair. */
  19499. nocb += rcu_future_gp_cleanup(rsp, rnp);
  19500. + sq = rcu_nocb_gp_get(rnp);
  19501. raw_spin_unlock_irq(&rnp->lock);
  19502. + rcu_nocb_gp_cleanup(sq);
  19503. cond_resched_rcu_qs();
  19504. WRITE_ONCE(rsp->gp_activity, jiffies);
  19505. rcu_gp_slow(rsp, gp_cleanup_delay);
  19506. @@ -2076,7 +2109,7 @@
  19507. READ_ONCE(rsp->gpnum),
  19508. TPS("reqwait"));
  19509. rsp->gp_state = RCU_GP_WAIT_GPS;
  19510. - wait_event_interruptible(rsp->gp_wq,
  19511. + swait_event_interruptible(rsp->gp_wq,
  19512. READ_ONCE(rsp->gp_flags) &
  19513. RCU_GP_FLAG_INIT);
  19514. rsp->gp_state = RCU_GP_DONE_GPS;
  19515. @@ -2106,7 +2139,7 @@
  19516. READ_ONCE(rsp->gpnum),
  19517. TPS("fqswait"));
  19518. rsp->gp_state = RCU_GP_WAIT_FQS;
  19519. - ret = wait_event_interruptible_timeout(rsp->gp_wq,
  19520. + ret = swait_event_interruptible_timeout(rsp->gp_wq,
  19521. rcu_gp_fqs_check_wake(rsp, &gf), j);
  19522. rsp->gp_state = RCU_GP_DOING_FQS;
  19523. /* Locking provides needed memory barriers. */
  19524. @@ -2230,7 +2263,7 @@
  19525. WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
  19526. WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
  19527. raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
  19528. - rcu_gp_kthread_wake(rsp);
  19529. + swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
  19530. }
  19531. /*
  19532. @@ -2891,7 +2924,7 @@
  19533. }
  19534. WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
  19535. raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
  19536. - rcu_gp_kthread_wake(rsp);
  19537. + swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
  19538. }
  19539. /*
  19540. @@ -2934,18 +2967,17 @@
  19541. /*
  19542. * Do RCU core processing for the current CPU.
  19543. */
  19544. -static void rcu_process_callbacks(struct softirq_action *unused)
  19545. +static void rcu_process_callbacks(void)
  19546. {
  19547. struct rcu_state *rsp;
  19548. if (cpu_is_offline(smp_processor_id()))
  19549. return;
  19550. - trace_rcu_utilization(TPS("Start RCU core"));
  19551. for_each_rcu_flavor(rsp)
  19552. __rcu_process_callbacks(rsp);
  19553. - trace_rcu_utilization(TPS("End RCU core"));
  19554. }
  19555. +static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
  19556. /*
  19557. * Schedule RCU callback invocation. If the specified type of RCU
  19558. * does not support RCU priority boosting, just do a direct call,
  19559. @@ -2957,18 +2989,105 @@
  19560. {
  19561. if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
  19562. return;
  19563. - if (likely(!rsp->boost)) {
  19564. - rcu_do_batch(rsp, rdp);
  19565. + rcu_do_batch(rsp, rdp);
  19566. +}
  19567. +
  19568. +static void rcu_wake_cond(struct task_struct *t, int status)
  19569. +{
  19570. + /*
  19571. + * If the thread is yielding, only wake it when this
  19572. + * is invoked from idle
  19573. + */
  19574. + if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
  19575. + wake_up_process(t);
  19576. +}
  19577. +
  19578. +/*
  19579. + * Wake up this CPU's rcuc kthread to do RCU core processing.
  19580. + */
  19581. +static void invoke_rcu_core(void)
  19582. +{
  19583. + unsigned long flags;
  19584. + struct task_struct *t;
  19585. +
  19586. + if (!cpu_online(smp_processor_id()))
  19587. return;
  19588. + local_irq_save(flags);
  19589. + __this_cpu_write(rcu_cpu_has_work, 1);
  19590. + t = __this_cpu_read(rcu_cpu_kthread_task);
  19591. + if (t != NULL && current != t)
  19592. + rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status));
  19593. + local_irq_restore(flags);
  19594. +}
  19595. +
  19596. +static void rcu_cpu_kthread_park(unsigned int cpu)
  19597. +{
  19598. + per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
  19599. +}
  19600. +
  19601. +static int rcu_cpu_kthread_should_run(unsigned int cpu)
  19602. +{
  19603. + return __this_cpu_read(rcu_cpu_has_work);
  19604. +}
  19605. +
  19606. +/*
  19607. + * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
  19608. + * RCU softirq used in flavors and configurations of RCU that do not
  19609. + * support RCU priority boosting.
  19610. + */
  19611. +static void rcu_cpu_kthread(unsigned int cpu)
  19612. +{
  19613. + unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
  19614. + char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
  19615. + int spincnt;
  19616. +
  19617. + for (spincnt = 0; spincnt < 10; spincnt++) {
  19618. + trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
  19619. + local_bh_disable();
  19620. + *statusp = RCU_KTHREAD_RUNNING;
  19621. + this_cpu_inc(rcu_cpu_kthread_loops);
  19622. + local_irq_disable();
  19623. + work = *workp;
  19624. + *workp = 0;
  19625. + local_irq_enable();
  19626. + if (work)
  19627. + rcu_process_callbacks();
  19628. + local_bh_enable();
  19629. + if (*workp == 0) {
  19630. + trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
  19631. + *statusp = RCU_KTHREAD_WAITING;
  19632. + return;
  19633. + }
  19634. }
  19635. - invoke_rcu_callbacks_kthread();
  19636. + *statusp = RCU_KTHREAD_YIELDING;
  19637. + trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
  19638. + schedule_timeout_interruptible(2);
  19639. + trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
  19640. + *statusp = RCU_KTHREAD_WAITING;
  19641. }
  19642. -static void invoke_rcu_core(void)
  19643. +static struct smp_hotplug_thread rcu_cpu_thread_spec = {
  19644. + .store = &rcu_cpu_kthread_task,
  19645. + .thread_should_run = rcu_cpu_kthread_should_run,
  19646. + .thread_fn = rcu_cpu_kthread,
  19647. + .thread_comm = "rcuc/%u",
  19648. + .setup = rcu_cpu_kthread_setup,
  19649. + .park = rcu_cpu_kthread_park,
  19650. +};
  19651. +
  19652. +/*
  19653. + * Spawn per-CPU RCU core processing kthreads.
  19654. + */
  19655. +static int __init rcu_spawn_core_kthreads(void)
  19656. {
  19657. - if (cpu_online(smp_processor_id()))
  19658. - raise_softirq(RCU_SOFTIRQ);
  19659. + int cpu;
  19660. +
  19661. + for_each_possible_cpu(cpu)
  19662. + per_cpu(rcu_cpu_has_work, cpu) = 0;
  19663. + BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
  19664. + return 0;
  19665. }
  19666. +early_initcall(rcu_spawn_core_kthreads);
  19667. /*
  19668. * Handle any core-RCU processing required by a call_rcu() invocation.
  19669. @@ -3114,6 +3233,7 @@
  19670. }
  19671. EXPORT_SYMBOL_GPL(call_rcu_sched);
  19672. +#ifndef CONFIG_PREEMPT_RT_FULL
  19673. /*
  19674. * Queue an RCU callback for invocation after a quicker grace period.
  19675. */
  19676. @@ -3122,6 +3242,7 @@
  19677. __call_rcu(head, func, &rcu_bh_state, -1, 0);
  19678. }
  19679. EXPORT_SYMBOL_GPL(call_rcu_bh);
  19680. +#endif
  19681. /*
  19682. * Queue an RCU callback for lazy invocation after a grace period.
  19683. @@ -3213,6 +3334,7 @@
  19684. }
  19685. EXPORT_SYMBOL_GPL(synchronize_sched);
  19686. +#ifndef CONFIG_PREEMPT_RT_FULL
  19687. /**
  19688. * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
  19689. *
  19690. @@ -3239,6 +3361,7 @@
  19691. wait_rcu_gp(call_rcu_bh);
  19692. }
  19693. EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
  19694. +#endif
  19695. /**
  19696. * get_state_synchronize_rcu - Snapshot current RCU state
  19697. @@ -3524,7 +3647,7 @@
  19698. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  19699. if (wake) {
  19700. smp_mb(); /* EGP done before wake_up(). */
  19701. - wake_up(&rsp->expedited_wq);
  19702. + swake_up(&rsp->expedited_wq);
  19703. }
  19704. break;
  19705. }
  19706. @@ -3781,7 +3904,7 @@
  19707. jiffies_start = jiffies;
  19708. for (;;) {
  19709. - ret = wait_event_interruptible_timeout(
  19710. + ret = swait_event_timeout(
  19711. rsp->expedited_wq,
  19712. sync_rcu_preempt_exp_done(rnp_root),
  19713. jiffies_stall);
  19714. @@ -3789,7 +3912,7 @@
  19715. return;
  19716. if (ret < 0) {
  19717. /* Hit a signal, disable CPU stall warnings. */
  19718. - wait_event(rsp->expedited_wq,
  19719. + swait_event(rsp->expedited_wq,
  19720. sync_rcu_preempt_exp_done(rnp_root));
  19721. return;
  19722. }
  19723. @@ -4101,6 +4224,7 @@
  19724. mutex_unlock(&rsp->barrier_mutex);
  19725. }
  19726. +#ifndef CONFIG_PREEMPT_RT_FULL
  19727. /**
  19728. * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
  19729. */
  19730. @@ -4109,6 +4233,7 @@
  19731. _rcu_barrier(&rcu_bh_state);
  19732. }
  19733. EXPORT_SYMBOL_GPL(rcu_barrier_bh);
  19734. +#endif
  19735. /**
  19736. * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
  19737. @@ -4455,8 +4580,8 @@
  19738. }
  19739. }
  19740. - init_waitqueue_head(&rsp->gp_wq);
  19741. - init_waitqueue_head(&rsp->expedited_wq);
  19742. + init_swait_queue_head(&rsp->gp_wq);
  19743. + init_swait_queue_head(&rsp->expedited_wq);
  19744. rnp = rsp->level[rcu_num_lvls - 1];
  19745. for_each_possible_cpu(i) {
  19746. while (i > rnp->grphi)
  19747. @@ -4576,12 +4701,13 @@
  19748. rcu_bootup_announce();
  19749. rcu_init_geometry();
  19750. +#ifndef CONFIG_PREEMPT_RT_FULL
  19751. rcu_init_one(&rcu_bh_state, &rcu_bh_data);
  19752. +#endif
  19753. rcu_init_one(&rcu_sched_state, &rcu_sched_data);
  19754. if (dump_tree)
  19755. rcu_dump_rcu_node_tree(&rcu_sched_state);
  19756. __rcu_init_preempt();
  19757. - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
  19758. /*
  19759. * We don't need protection against CPU-hotplug here because
  19760. diff -Nur linux-4.4.62.orig/kernel/rcu/tree.h linux-4.4.62/kernel/rcu/tree.h
  19761. --- linux-4.4.62.orig/kernel/rcu/tree.h 2017-04-18 07:15:37.000000000 +0200
  19762. +++ linux-4.4.62/kernel/rcu/tree.h 2017-04-18 17:38:08.214650020 +0200
  19763. @@ -27,6 +27,7 @@
  19764. #include <linux/threads.h>
  19765. #include <linux/cpumask.h>
  19766. #include <linux/seqlock.h>
  19767. +#include <linux/swait.h>
  19768. #include <linux/stop_machine.h>
  19769. /*
  19770. @@ -241,7 +242,7 @@
  19771. /* Refused to boost: not sure why, though. */
  19772. /* This can happen due to race conditions. */
  19773. #ifdef CONFIG_RCU_NOCB_CPU
  19774. - wait_queue_head_t nocb_gp_wq[2];
  19775. + struct swait_queue_head nocb_gp_wq[2];
  19776. /* Place for rcu_nocb_kthread() to wait GP. */
  19777. #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
  19778. int need_future_gp[2];
  19779. @@ -393,7 +394,7 @@
  19780. atomic_long_t nocb_q_count_lazy; /* invocation (all stages). */
  19781. struct rcu_head *nocb_follower_head; /* CBs ready to invoke. */
  19782. struct rcu_head **nocb_follower_tail;
  19783. - wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
  19784. + struct swait_queue_head nocb_wq; /* For nocb kthreads to sleep on. */
  19785. struct task_struct *nocb_kthread;
  19786. int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */
  19787. @@ -472,7 +473,7 @@
  19788. unsigned long gpnum; /* Current gp number. */
  19789. unsigned long completed; /* # of last completed gp. */
  19790. struct task_struct *gp_kthread; /* Task for grace periods. */
  19791. - wait_queue_head_t gp_wq; /* Where GP task waits. */
  19792. + struct swait_queue_head gp_wq; /* Where GP task waits. */
  19793. short gp_flags; /* Commands for GP task. */
  19794. short gp_state; /* GP kthread sleep state. */
  19795. @@ -504,7 +505,7 @@
  19796. atomic_long_t expedited_workdone3; /* # done by others #3. */
  19797. atomic_long_t expedited_normal; /* # fallbacks to normal. */
  19798. atomic_t expedited_need_qs; /* # CPUs left to check in. */
  19799. - wait_queue_head_t expedited_wq; /* Wait for check-ins. */
  19800. + struct swait_queue_head expedited_wq; /* Wait for check-ins. */
  19801. int ncpus_snap; /* # CPUs seen last time. */
  19802. unsigned long jiffies_force_qs; /* Time at which to invoke */
  19803. @@ -556,18 +557,18 @@
  19804. */
  19805. extern struct rcu_state rcu_sched_state;
  19806. +#ifndef CONFIG_PREEMPT_RT_FULL
  19807. extern struct rcu_state rcu_bh_state;
  19808. +#endif
  19809. #ifdef CONFIG_PREEMPT_RCU
  19810. extern struct rcu_state rcu_preempt_state;
  19811. #endif /* #ifdef CONFIG_PREEMPT_RCU */
  19812. -#ifdef CONFIG_RCU_BOOST
  19813. DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
  19814. DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
  19815. DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
  19816. DECLARE_PER_CPU(char, rcu_cpu_has_work);
  19817. -#endif /* #ifdef CONFIG_RCU_BOOST */
  19818. #ifndef RCU_TREE_NONCORE
  19819. @@ -587,10 +588,9 @@
  19820. static void __init __rcu_init_preempt(void);
  19821. static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
  19822. static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
  19823. -static void invoke_rcu_callbacks_kthread(void);
  19824. static bool rcu_is_callbacks_kthread(void);
  19825. +static void rcu_cpu_kthread_setup(unsigned int cpu);
  19826. #ifdef CONFIG_RCU_BOOST
  19827. -static void rcu_preempt_do_callbacks(void);
  19828. static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
  19829. struct rcu_node *rnp);
  19830. #endif /* #ifdef CONFIG_RCU_BOOST */
  19831. @@ -607,7 +607,8 @@
  19832. static void increment_cpu_stall_ticks(void);
  19833. static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu);
  19834. static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
  19835. -static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp);
  19836. +static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
  19837. +static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
  19838. static void rcu_init_one_nocb(struct rcu_node *rnp);
  19839. static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
  19840. bool lazy, unsigned long flags);
  19841. diff -Nur linux-4.4.62.orig/kernel/rcu/tree_plugin.h linux-4.4.62/kernel/rcu/tree_plugin.h
  19842. --- linux-4.4.62.orig/kernel/rcu/tree_plugin.h 2017-04-18 07:15:37.000000000 +0200
  19843. +++ linux-4.4.62/kernel/rcu/tree_plugin.h 2017-04-18 17:38:08.214650020 +0200
  19844. @@ -24,25 +24,10 @@
  19845. * Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  19846. */
  19847. -#include <linux/delay.h>
  19848. -#include <linux/gfp.h>
  19849. -#include <linux/oom.h>
  19850. -#include <linux/smpboot.h>
  19851. -#include "../time/tick-internal.h"
  19852. -
  19853. #ifdef CONFIG_RCU_BOOST
  19854. #include "../locking/rtmutex_common.h"
  19855. -/*
  19856. - * Control variables for per-CPU and per-rcu_node kthreads. These
  19857. - * handle all flavors of RCU.
  19858. - */
  19859. -static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
  19860. -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
  19861. -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
  19862. -DEFINE_PER_CPU(char, rcu_cpu_has_work);
  19863. -
  19864. #else /* #ifdef CONFIG_RCU_BOOST */
  19865. /*
  19866. @@ -55,6 +40,14 @@
  19867. #endif /* #else #ifdef CONFIG_RCU_BOOST */
  19868. +/*
  19869. + * Control variables for per-CPU and per-rcu_node kthreads. These
  19870. + * handle all flavors of RCU.
  19871. + */
  19872. +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
  19873. +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
  19874. +DEFINE_PER_CPU(char, rcu_cpu_has_work);
  19875. +
  19876. #ifdef CONFIG_RCU_NOCB_CPU
  19877. static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
  19878. static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */
  19879. @@ -432,7 +425,7 @@
  19880. }
  19881. /* Hardware IRQ handlers cannot block, complain if they get here. */
  19882. - if (in_irq() || in_serving_softirq()) {
  19883. + if (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET)) {
  19884. lockdep_rcu_suspicious(__FILE__, __LINE__,
  19885. "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n");
  19886. pr_alert("->rcu_read_unlock_special: %#x (b: %d, enq: %d nq: %d)\n",
  19887. @@ -645,15 +638,6 @@
  19888. t->rcu_read_unlock_special.b.need_qs = true;
  19889. }
  19890. -#ifdef CONFIG_RCU_BOOST
  19891. -
  19892. -static void rcu_preempt_do_callbacks(void)
  19893. -{
  19894. - rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p));
  19895. -}
  19896. -
  19897. -#endif /* #ifdef CONFIG_RCU_BOOST */
  19898. -
  19899. /*
  19900. * Queue a preemptible-RCU callback for invocation after a grace period.
  19901. */
  19902. @@ -930,6 +914,19 @@
  19903. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  19904. +/*
  19905. + * If boosting, set rcuc kthreads to realtime priority.
  19906. + */
  19907. +static void rcu_cpu_kthread_setup(unsigned int cpu)
  19908. +{
  19909. +#ifdef CONFIG_RCU_BOOST
  19910. + struct sched_param sp;
  19911. +
  19912. + sp.sched_priority = kthread_prio;
  19913. + sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
  19914. +#endif /* #ifdef CONFIG_RCU_BOOST */
  19915. +}
  19916. +
  19917. #ifdef CONFIG_RCU_BOOST
  19918. #include "../locking/rtmutex_common.h"
  19919. @@ -961,16 +958,6 @@
  19920. #endif /* #else #ifdef CONFIG_RCU_TRACE */
  19921. -static void rcu_wake_cond(struct task_struct *t, int status)
  19922. -{
  19923. - /*
  19924. - * If the thread is yielding, only wake it when this
  19925. - * is invoked from idle
  19926. - */
  19927. - if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
  19928. - wake_up_process(t);
  19929. -}
  19930. -
  19931. /*
  19932. * Carry out RCU priority boosting on the task indicated by ->exp_tasks
  19933. * or ->boost_tasks, advancing the pointer to the next task in the
  19934. @@ -1115,23 +1102,6 @@
  19935. }
  19936. /*
  19937. - * Wake up the per-CPU kthread to invoke RCU callbacks.
  19938. - */
  19939. -static void invoke_rcu_callbacks_kthread(void)
  19940. -{
  19941. - unsigned long flags;
  19942. -
  19943. - local_irq_save(flags);
  19944. - __this_cpu_write(rcu_cpu_has_work, 1);
  19945. - if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
  19946. - current != __this_cpu_read(rcu_cpu_kthread_task)) {
  19947. - rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
  19948. - __this_cpu_read(rcu_cpu_kthread_status));
  19949. - }
  19950. - local_irq_restore(flags);
  19951. -}
  19952. -
  19953. -/*
  19954. * Is the current CPU running the RCU-callbacks kthread?
  19955. * Caller must have preemption disabled.
  19956. */
  19957. @@ -1186,67 +1156,6 @@
  19958. return 0;
  19959. }
  19960. -static void rcu_kthread_do_work(void)
  19961. -{
  19962. - rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
  19963. - rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
  19964. - rcu_preempt_do_callbacks();
  19965. -}
  19966. -
  19967. -static void rcu_cpu_kthread_setup(unsigned int cpu)
  19968. -{
  19969. - struct sched_param sp;
  19970. -
  19971. - sp.sched_priority = kthread_prio;
  19972. - sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
  19973. -}
  19974. -
  19975. -static void rcu_cpu_kthread_park(unsigned int cpu)
  19976. -{
  19977. - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
  19978. -}
  19979. -
  19980. -static int rcu_cpu_kthread_should_run(unsigned int cpu)
  19981. -{
  19982. - return __this_cpu_read(rcu_cpu_has_work);
  19983. -}
  19984. -
  19985. -/*
  19986. - * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
  19987. - * RCU softirq used in flavors and configurations of RCU that do not
  19988. - * support RCU priority boosting.
  19989. - */
  19990. -static void rcu_cpu_kthread(unsigned int cpu)
  19991. -{
  19992. - unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
  19993. - char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
  19994. - int spincnt;
  19995. -
  19996. - for (spincnt = 0; spincnt < 10; spincnt++) {
  19997. - trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
  19998. - local_bh_disable();
  19999. - *statusp = RCU_KTHREAD_RUNNING;
  20000. - this_cpu_inc(rcu_cpu_kthread_loops);
  20001. - local_irq_disable();
  20002. - work = *workp;
  20003. - *workp = 0;
  20004. - local_irq_enable();
  20005. - if (work)
  20006. - rcu_kthread_do_work();
  20007. - local_bh_enable();
  20008. - if (*workp == 0) {
  20009. - trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
  20010. - *statusp = RCU_KTHREAD_WAITING;
  20011. - return;
  20012. - }
  20013. - }
  20014. - *statusp = RCU_KTHREAD_YIELDING;
  20015. - trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
  20016. - schedule_timeout_interruptible(2);
  20017. - trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
  20018. - *statusp = RCU_KTHREAD_WAITING;
  20019. -}
  20020. -
  20021. /*
  20022. * Set the per-rcu_node kthread's affinity to cover all CPUs that are
  20023. * served by the rcu_node in question. The CPU hotplug lock is still
  20024. @@ -1276,26 +1185,12 @@
  20025. free_cpumask_var(cm);
  20026. }
  20027. -static struct smp_hotplug_thread rcu_cpu_thread_spec = {
  20028. - .store = &rcu_cpu_kthread_task,
  20029. - .thread_should_run = rcu_cpu_kthread_should_run,
  20030. - .thread_fn = rcu_cpu_kthread,
  20031. - .thread_comm = "rcuc/%u",
  20032. - .setup = rcu_cpu_kthread_setup,
  20033. - .park = rcu_cpu_kthread_park,
  20034. -};
  20035. -
  20036. /*
  20037. * Spawn boost kthreads -- called as soon as the scheduler is running.
  20038. */
  20039. static void __init rcu_spawn_boost_kthreads(void)
  20040. {
  20041. struct rcu_node *rnp;
  20042. - int cpu;
  20043. -
  20044. - for_each_possible_cpu(cpu)
  20045. - per_cpu(rcu_cpu_has_work, cpu) = 0;
  20046. - BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
  20047. rcu_for_each_leaf_node(rcu_state_p, rnp)
  20048. (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
  20049. }
  20050. @@ -1318,11 +1213,6 @@
  20051. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  20052. }
  20053. -static void invoke_rcu_callbacks_kthread(void)
  20054. -{
  20055. - WARN_ON_ONCE(1);
  20056. -}
  20057. -
  20058. static bool rcu_is_callbacks_kthread(void)
  20059. {
  20060. return false;
  20061. @@ -1346,7 +1236,7 @@
  20062. #endif /* #else #ifdef CONFIG_RCU_BOOST */
  20063. -#if !defined(CONFIG_RCU_FAST_NO_HZ)
  20064. +#if !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL)
  20065. /*
  20066. * Check to see if any future RCU-related work will need to be done
  20067. @@ -1363,7 +1253,9 @@
  20068. return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)
  20069. ? 0 : rcu_cpu_has_callbacks(NULL);
  20070. }
  20071. +#endif /* !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) */
  20072. +#if !defined(CONFIG_RCU_FAST_NO_HZ)
  20073. /*
  20074. * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
  20075. * after it.
  20076. @@ -1459,6 +1351,8 @@
  20077. return cbs_ready;
  20078. }
  20079. +#ifndef CONFIG_PREEMPT_RT_FULL
  20080. +
  20081. /*
  20082. * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
  20083. * to invoke. If the CPU has callbacks, try to advance them. Tell the
  20084. @@ -1504,6 +1398,7 @@
  20085. *nextevt = basemono + dj * TICK_NSEC;
  20086. return 0;
  20087. }
  20088. +#endif /* #ifndef CONFIG_PREEMPT_RT_FULL */
  20089. /*
  20090. * Prepare a CPU for idle from an RCU perspective. The first major task
  20091. @@ -1822,9 +1717,9 @@
  20092. * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
  20093. * grace period.
  20094. */
  20095. -static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
  20096. +static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
  20097. {
  20098. - wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
  20099. + swake_up_all(sq);
  20100. }
  20101. /*
  20102. @@ -1840,10 +1735,15 @@
  20103. rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;
  20104. }
  20105. +static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
  20106. +{
  20107. + return &rnp->nocb_gp_wq[rnp->completed & 0x1];
  20108. +}
  20109. +
  20110. static void rcu_init_one_nocb(struct rcu_node *rnp)
  20111. {
  20112. - init_waitqueue_head(&rnp->nocb_gp_wq[0]);
  20113. - init_waitqueue_head(&rnp->nocb_gp_wq[1]);
  20114. + init_swait_queue_head(&rnp->nocb_gp_wq[0]);
  20115. + init_swait_queue_head(&rnp->nocb_gp_wq[1]);
  20116. }
  20117. #ifndef CONFIG_RCU_NOCB_CPU_ALL
  20118. @@ -1868,7 +1768,7 @@
  20119. if (READ_ONCE(rdp_leader->nocb_leader_sleep) || force) {
  20120. /* Prior smp_mb__after_atomic() orders against prior enqueue. */
  20121. WRITE_ONCE(rdp_leader->nocb_leader_sleep, false);
  20122. - wake_up(&rdp_leader->nocb_wq);
  20123. + swake_up(&rdp_leader->nocb_wq);
  20124. }
  20125. }
  20126. @@ -2081,7 +1981,7 @@
  20127. */
  20128. trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait"));
  20129. for (;;) {
  20130. - wait_event_interruptible(
  20131. + swait_event_interruptible(
  20132. rnp->nocb_gp_wq[c & 0x1],
  20133. (d = ULONG_CMP_GE(READ_ONCE(rnp->completed), c)));
  20134. if (likely(d))
  20135. @@ -2109,7 +2009,7 @@
  20136. /* Wait for callbacks to appear. */
  20137. if (!rcu_nocb_poll) {
  20138. trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep");
  20139. - wait_event_interruptible(my_rdp->nocb_wq,
  20140. + swait_event_interruptible(my_rdp->nocb_wq,
  20141. !READ_ONCE(my_rdp->nocb_leader_sleep));
  20142. /* Memory barrier handled by smp_mb() calls below and repoll. */
  20143. } else if (firsttime) {
  20144. @@ -2184,7 +2084,7 @@
  20145. * List was empty, wake up the follower.
  20146. * Memory barriers supplied by atomic_long_add().
  20147. */
  20148. - wake_up(&rdp->nocb_wq);
  20149. + swake_up(&rdp->nocb_wq);
  20150. }
  20151. }
  20152. @@ -2205,7 +2105,7 @@
  20153. if (!rcu_nocb_poll) {
  20154. trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
  20155. "FollowerSleep");
  20156. - wait_event_interruptible(rdp->nocb_wq,
  20157. + swait_event_interruptible(rdp->nocb_wq,
  20158. READ_ONCE(rdp->nocb_follower_head));
  20159. } else if (firsttime) {
  20160. /* Don't drown trace log with "Poll"! */
  20161. @@ -2365,7 +2265,7 @@
  20162. static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
  20163. {
  20164. rdp->nocb_tail = &rdp->nocb_head;
  20165. - init_waitqueue_head(&rdp->nocb_wq);
  20166. + init_swait_queue_head(&rdp->nocb_wq);
  20167. rdp->nocb_follower_tail = &rdp->nocb_follower_head;
  20168. }
  20169. @@ -2515,7 +2415,7 @@
  20170. return false;
  20171. }
  20172. -static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
  20173. +static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
  20174. {
  20175. }
  20176. @@ -2523,6 +2423,11 @@
  20177. {
  20178. }
  20179. +static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
  20180. +{
  20181. + return NULL;
  20182. +}
  20183. +
  20184. static void rcu_init_one_nocb(struct rcu_node *rnp)
  20185. {
  20186. }
  20187. diff -Nur linux-4.4.62.orig/kernel/rcu/update.c linux-4.4.62/kernel/rcu/update.c
  20188. --- linux-4.4.62.orig/kernel/rcu/update.c 2017-04-18 07:15:37.000000000 +0200
  20189. +++ linux-4.4.62/kernel/rcu/update.c 2017-04-18 17:38:08.214650020 +0200
  20190. @@ -276,6 +276,7 @@
  20191. }
  20192. EXPORT_SYMBOL_GPL(rcu_read_lock_held);
  20193. +#ifndef CONFIG_PREEMPT_RT_FULL
  20194. /**
  20195. * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
  20196. *
  20197. @@ -302,6 +303,7 @@
  20198. return in_softirq() || irqs_disabled();
  20199. }
  20200. EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
  20201. +#endif
  20202. #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
  20203. diff -Nur linux-4.4.62.orig/kernel/relay.c linux-4.4.62/kernel/relay.c
  20204. --- linux-4.4.62.orig/kernel/relay.c 2017-04-18 07:15:37.000000000 +0200
  20205. +++ linux-4.4.62/kernel/relay.c 2017-04-18 17:38:08.214650020 +0200
  20206. @@ -336,6 +336,10 @@
  20207. {
  20208. struct rchan_buf *buf = (struct rchan_buf *)data;
  20209. wake_up_interruptible(&buf->read_wait);
  20210. + /*
  20211. + * Stupid polling for now:
  20212. + */
  20213. + mod_timer(&buf->timer, jiffies + 1);
  20214. }
  20215. /**
  20216. @@ -353,6 +357,7 @@
  20217. init_waitqueue_head(&buf->read_wait);
  20218. kref_init(&buf->kref);
  20219. setup_timer(&buf->timer, wakeup_readers, (unsigned long)buf);
  20220. + mod_timer(&buf->timer, jiffies + 1);
  20221. } else
  20222. del_timer_sync(&buf->timer);
  20223. @@ -736,15 +741,6 @@
  20224. else
  20225. buf->early_bytes += buf->chan->subbuf_size -
  20226. buf->padding[old_subbuf];
  20227. - smp_mb();
  20228. - if (waitqueue_active(&buf->read_wait))
  20229. - /*
  20230. - * Calling wake_up_interruptible() from here
  20231. - * will deadlock if we happen to be logging
  20232. - * from the scheduler (trying to re-grab
  20233. - * rq->lock), so defer it.
  20234. - */
  20235. - mod_timer(&buf->timer, jiffies + 1);
  20236. }
  20237. old = buf->data;
  20238. diff -Nur linux-4.4.62.orig/kernel/sched/completion.c linux-4.4.62/kernel/sched/completion.c
  20239. --- linux-4.4.62.orig/kernel/sched/completion.c 2017-04-18 07:15:37.000000000 +0200
  20240. +++ linux-4.4.62/kernel/sched/completion.c 2017-04-18 17:38:08.214650020 +0200
  20241. @@ -30,10 +30,10 @@
  20242. {
  20243. unsigned long flags;
  20244. - spin_lock_irqsave(&x->wait.lock, flags);
  20245. + raw_spin_lock_irqsave(&x->wait.lock, flags);
  20246. x->done++;
  20247. - __wake_up_locked(&x->wait, TASK_NORMAL, 1);
  20248. - spin_unlock_irqrestore(&x->wait.lock, flags);
  20249. + swake_up_locked(&x->wait);
  20250. + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
  20251. }
  20252. EXPORT_SYMBOL(complete);
  20253. @@ -50,10 +50,10 @@
  20254. {
  20255. unsigned long flags;
  20256. - spin_lock_irqsave(&x->wait.lock, flags);
  20257. + raw_spin_lock_irqsave(&x->wait.lock, flags);
  20258. x->done += UINT_MAX/2;
  20259. - __wake_up_locked(&x->wait, TASK_NORMAL, 0);
  20260. - spin_unlock_irqrestore(&x->wait.lock, flags);
  20261. + swake_up_all_locked(&x->wait);
  20262. + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
  20263. }
  20264. EXPORT_SYMBOL(complete_all);
  20265. @@ -62,20 +62,20 @@
  20266. long (*action)(long), long timeout, int state)
  20267. {
  20268. if (!x->done) {
  20269. - DECLARE_WAITQUEUE(wait, current);
  20270. + DECLARE_SWAITQUEUE(wait);
  20271. - __add_wait_queue_tail_exclusive(&x->wait, &wait);
  20272. + __prepare_to_swait(&x->wait, &wait);
  20273. do {
  20274. if (signal_pending_state(state, current)) {
  20275. timeout = -ERESTARTSYS;
  20276. break;
  20277. }
  20278. __set_current_state(state);
  20279. - spin_unlock_irq(&x->wait.lock);
  20280. + raw_spin_unlock_irq(&x->wait.lock);
  20281. timeout = action(timeout);
  20282. - spin_lock_irq(&x->wait.lock);
  20283. + raw_spin_lock_irq(&x->wait.lock);
  20284. } while (!x->done && timeout);
  20285. - __remove_wait_queue(&x->wait, &wait);
  20286. + __finish_swait(&x->wait, &wait);
  20287. if (!x->done)
  20288. return timeout;
  20289. }
  20290. @@ -89,9 +89,9 @@
  20291. {
  20292. might_sleep();
  20293. - spin_lock_irq(&x->wait.lock);
  20294. + raw_spin_lock_irq(&x->wait.lock);
  20295. timeout = do_wait_for_common(x, action, timeout, state);
  20296. - spin_unlock_irq(&x->wait.lock);
  20297. + raw_spin_unlock_irq(&x->wait.lock);
  20298. return timeout;
  20299. }
  20300. @@ -277,12 +277,12 @@
  20301. if (!READ_ONCE(x->done))
  20302. return 0;
  20303. - spin_lock_irqsave(&x->wait.lock, flags);
  20304. + raw_spin_lock_irqsave(&x->wait.lock, flags);
  20305. if (!x->done)
  20306. ret = 0;
  20307. else
  20308. x->done--;
  20309. - spin_unlock_irqrestore(&x->wait.lock, flags);
  20310. + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
  20311. return ret;
  20312. }
  20313. EXPORT_SYMBOL(try_wait_for_completion);
  20314. @@ -311,7 +311,7 @@
  20315. * after it's acquired the lock.
  20316. */
  20317. smp_rmb();
  20318. - spin_unlock_wait(&x->wait.lock);
  20319. + raw_spin_unlock_wait(&x->wait.lock);
  20320. return true;
  20321. }
  20322. EXPORT_SYMBOL(completion_done);
  20323. diff -Nur linux-4.4.62.orig/kernel/sched/core.c linux-4.4.62/kernel/sched/core.c
  20324. --- linux-4.4.62.orig/kernel/sched/core.c 2017-04-18 07:15:37.000000000 +0200
  20325. +++ linux-4.4.62/kernel/sched/core.c 2017-04-18 17:38:08.218650175 +0200
  20326. @@ -260,7 +260,11 @@
  20327. * Number of tasks to iterate in a single balance run.
  20328. * Limited because this is done with IRQs disabled.
  20329. */
  20330. +#ifndef CONFIG_PREEMPT_RT_FULL
  20331. const_debug unsigned int sysctl_sched_nr_migrate = 32;
  20332. +#else
  20333. +const_debug unsigned int sysctl_sched_nr_migrate = 8;
  20334. +#endif
  20335. /*
  20336. * period over which we average the RT time consumption, measured
  20337. @@ -438,6 +442,7 @@
  20338. hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  20339. rq->hrtick_timer.function = hrtick;
  20340. + rq->hrtick_timer.irqsafe = 1;
  20341. }
  20342. #else /* CONFIG_SCHED_HRTICK */
  20343. static inline void hrtick_clear(struct rq *rq)
  20344. @@ -542,7 +547,7 @@
  20345. head->lastp = &node->next;
  20346. }
  20347. -void wake_up_q(struct wake_q_head *head)
  20348. +void __wake_up_q(struct wake_q_head *head, bool sleeper)
  20349. {
  20350. struct wake_q_node *node = head->first;
  20351. @@ -559,7 +564,10 @@
  20352. * wake_up_process() implies a wmb() to pair with the queueing
  20353. * in wake_q_add() so as not to miss wakeups.
  20354. */
  20355. - wake_up_process(task);
  20356. + if (sleeper)
  20357. + wake_up_lock_sleeper(task);
  20358. + else
  20359. + wake_up_process(task);
  20360. put_task_struct(task);
  20361. }
  20362. }
  20363. @@ -595,6 +603,38 @@
  20364. trace_sched_wake_idle_without_ipi(cpu);
  20365. }
  20366. +#ifdef CONFIG_PREEMPT_LAZY
  20367. +void resched_curr_lazy(struct rq *rq)
  20368. +{
  20369. + struct task_struct *curr = rq->curr;
  20370. + int cpu;
  20371. +
  20372. + if (!sched_feat(PREEMPT_LAZY)) {
  20373. + resched_curr(rq);
  20374. + return;
  20375. + }
  20376. +
  20377. + lockdep_assert_held(&rq->lock);
  20378. +
  20379. + if (test_tsk_need_resched(curr))
  20380. + return;
  20381. +
  20382. + if (test_tsk_need_resched_lazy(curr))
  20383. + return;
  20384. +
  20385. + set_tsk_need_resched_lazy(curr);
  20386. +
  20387. + cpu = cpu_of(rq);
  20388. + if (cpu == smp_processor_id())
  20389. + return;
  20390. +
  20391. + /* NEED_RESCHED_LAZY must be visible before we test polling */
  20392. + smp_mb();
  20393. + if (!tsk_is_polling(curr))
  20394. + smp_send_reschedule(cpu);
  20395. +}
  20396. +#endif
  20397. +
  20398. void resched_cpu(int cpu)
  20399. {
  20400. struct rq *rq = cpu_rq(cpu);
  20401. @@ -618,11 +658,14 @@
  20402. */
  20403. int get_nohz_timer_target(void)
  20404. {
  20405. - int i, cpu = smp_processor_id();
  20406. + int i, cpu;
  20407. struct sched_domain *sd;
  20408. + preempt_disable_rt();
  20409. + cpu = smp_processor_id();
  20410. +
  20411. if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu))
  20412. - return cpu;
  20413. + goto preempt_en_rt;
  20414. rcu_read_lock();
  20415. for_each_domain(cpu, sd) {
  20416. @@ -641,6 +684,8 @@
  20417. cpu = housekeeping_any_cpu();
  20418. unlock:
  20419. rcu_read_unlock();
  20420. +preempt_en_rt:
  20421. + preempt_enable_rt();
  20422. return cpu;
  20423. }
  20424. /*
  20425. @@ -1174,6 +1219,11 @@
  20426. lockdep_assert_held(&p->pi_lock);
  20427. + if (__migrate_disabled(p)) {
  20428. + cpumask_copy(&p->cpus_allowed, new_mask);
  20429. + return;
  20430. + }
  20431. +
  20432. queued = task_on_rq_queued(p);
  20433. running = task_current(rq, p);
  20434. @@ -1196,6 +1246,84 @@
  20435. enqueue_task(rq, p, ENQUEUE_RESTORE);
  20436. }
  20437. +static DEFINE_PER_CPU(struct cpumask, sched_cpumasks);
  20438. +static DEFINE_MUTEX(sched_down_mutex);
  20439. +static cpumask_t sched_down_cpumask;
  20440. +
  20441. +void tell_sched_cpu_down_begin(int cpu)
  20442. +{
  20443. + mutex_lock(&sched_down_mutex);
  20444. + cpumask_set_cpu(cpu, &sched_down_cpumask);
  20445. + mutex_unlock(&sched_down_mutex);
  20446. +}
  20447. +
  20448. +void tell_sched_cpu_down_done(int cpu)
  20449. +{
  20450. + mutex_lock(&sched_down_mutex);
  20451. + cpumask_clear_cpu(cpu, &sched_down_cpumask);
  20452. + mutex_unlock(&sched_down_mutex);
  20453. +}
  20454. +
  20455. +/**
  20456. + * migrate_me - try to move the current task off this cpu
  20457. + *
  20458. + * Used by the pin_current_cpu() code to try to get tasks
  20459. + * to move off the current CPU as it is going down.
  20460. + * It will only move the task if the task isn't pinned to
  20461. + * the CPU (with migrate_disable, affinity or NO_SETAFFINITY)
  20462. + * and the task has to be in a RUNNING state. Otherwise the
  20463. + * movement of the task will wake it up (change its state
  20464. + * to running) when the task did not expect it.
  20465. + *
  20466. + * Returns 1 if it succeeded in moving the current task
  20467. + * 0 otherwise.
  20468. + */
  20469. +int migrate_me(void)
  20470. +{
  20471. + struct task_struct *p = current;
  20472. + struct migration_arg arg;
  20473. + struct cpumask *cpumask;
  20474. + struct cpumask *mask;
  20475. + unsigned long flags;
  20476. + unsigned int dest_cpu;
  20477. + struct rq *rq;
  20478. +
  20479. + /*
  20480. + * We can not migrate tasks bounded to a CPU or tasks not
  20481. + * running. The movement of the task will wake it up.
  20482. + */
  20483. + if (p->flags & PF_NO_SETAFFINITY || p->state)
  20484. + return 0;
  20485. +
  20486. + mutex_lock(&sched_down_mutex);
  20487. + rq = task_rq_lock(p, &flags);
  20488. +
  20489. + cpumask = this_cpu_ptr(&sched_cpumasks);
  20490. + mask = &p->cpus_allowed;
  20491. +
  20492. + cpumask_andnot(cpumask, mask, &sched_down_cpumask);
  20493. +
  20494. + if (!cpumask_weight(cpumask)) {
  20495. + /* It's only on this CPU? */
  20496. + task_rq_unlock(rq, p, &flags);
  20497. + mutex_unlock(&sched_down_mutex);
  20498. + return 0;
  20499. + }
  20500. +
  20501. + dest_cpu = cpumask_any_and(cpu_active_mask, cpumask);
  20502. +
  20503. + arg.task = p;
  20504. + arg.dest_cpu = dest_cpu;
  20505. +
  20506. + task_rq_unlock(rq, p, &flags);
  20507. +
  20508. + stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
  20509. + tlb_migrate_finish(p->mm);
  20510. + mutex_unlock(&sched_down_mutex);
  20511. +
  20512. + return 1;
  20513. +}
  20514. +
  20515. /*
  20516. * Change a given task's CPU affinity. Migrate the thread to a
  20517. * proper CPU and schedule it away if the CPU it's executing on
  20518. @@ -1235,7 +1363,7 @@
  20519. do_set_cpus_allowed(p, new_mask);
  20520. /* Can the task run on the task's current CPU? If so, we're done */
  20521. - if (cpumask_test_cpu(task_cpu(p), new_mask))
  20522. + if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p))
  20523. goto out;
  20524. dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
  20525. @@ -1411,6 +1539,18 @@
  20526. return ret;
  20527. }
  20528. +static bool check_task_state(struct task_struct *p, long match_state)
  20529. +{
  20530. + bool match = false;
  20531. +
  20532. + raw_spin_lock_irq(&p->pi_lock);
  20533. + if (p->state == match_state || p->saved_state == match_state)
  20534. + match = true;
  20535. + raw_spin_unlock_irq(&p->pi_lock);
  20536. +
  20537. + return match;
  20538. +}
  20539. +
  20540. /*
  20541. * wait_task_inactive - wait for a thread to unschedule.
  20542. *
  20543. @@ -1455,7 +1595,7 @@
  20544. * is actually now running somewhere else!
  20545. */
  20546. while (task_running(rq, p)) {
  20547. - if (match_state && unlikely(p->state != match_state))
  20548. + if (match_state && !check_task_state(p, match_state))
  20549. return 0;
  20550. cpu_relax();
  20551. }
  20552. @@ -1470,7 +1610,8 @@
  20553. running = task_running(rq, p);
  20554. queued = task_on_rq_queued(p);
  20555. ncsw = 0;
  20556. - if (!match_state || p->state == match_state)
  20557. + if (!match_state || p->state == match_state ||
  20558. + p->saved_state == match_state)
  20559. ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
  20560. task_rq_unlock(rq, p, &flags);
  20561. @@ -1627,7 +1768,7 @@
  20562. {
  20563. lockdep_assert_held(&p->pi_lock);
  20564. - if (p->nr_cpus_allowed > 1)
  20565. + if (tsk_nr_cpus_allowed(p) > 1)
  20566. cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
  20567. /*
  20568. @@ -1707,10 +1848,6 @@
  20569. {
  20570. activate_task(rq, p, en_flags);
  20571. p->on_rq = TASK_ON_RQ_QUEUED;
  20572. -
  20573. - /* if a worker is waking up, notify workqueue */
  20574. - if (p->flags & PF_WQ_WORKER)
  20575. - wq_worker_waking_up(p, cpu_of(rq));
  20576. }
  20577. /*
  20578. @@ -1937,8 +2074,27 @@
  20579. */
  20580. smp_mb__before_spinlock();
  20581. raw_spin_lock_irqsave(&p->pi_lock, flags);
  20582. - if (!(p->state & state))
  20583. + if (!(p->state & state)) {
  20584. + /*
  20585. + * The task might be running due to a spinlock sleeper
  20586. + * wakeup. Check the saved state and set it to running
  20587. + * if the wakeup condition is true.
  20588. + */
  20589. + if (!(wake_flags & WF_LOCK_SLEEPER)) {
  20590. + if (p->saved_state & state) {
  20591. + p->saved_state = TASK_RUNNING;
  20592. + success = 1;
  20593. + }
  20594. + }
  20595. goto out;
  20596. + }
  20597. +
  20598. + /*
  20599. + * If this is a regular wakeup, then we can unconditionally
  20600. + * clear the saved state of a "lock sleeper".
  20601. + */
  20602. + if (!(wake_flags & WF_LOCK_SLEEPER))
  20603. + p->saved_state = TASK_RUNNING;
  20604. trace_sched_waking(p);
  20605. @@ -2030,52 +2186,6 @@
  20606. }
  20607. /**
  20608. - * try_to_wake_up_local - try to wake up a local task with rq lock held
  20609. - * @p: the thread to be awakened
  20610. - *
  20611. - * Put @p on the run-queue if it's not already there. The caller must
  20612. - * ensure that this_rq() is locked, @p is bound to this_rq() and not
  20613. - * the current task.
  20614. - */
  20615. -static void try_to_wake_up_local(struct task_struct *p)
  20616. -{
  20617. - struct rq *rq = task_rq(p);
  20618. -
  20619. - if (WARN_ON_ONCE(rq != this_rq()) ||
  20620. - WARN_ON_ONCE(p == current))
  20621. - return;
  20622. -
  20623. - lockdep_assert_held(&rq->lock);
  20624. -
  20625. - if (!raw_spin_trylock(&p->pi_lock)) {
  20626. - /*
  20627. - * This is OK, because current is on_cpu, which avoids it being
  20628. - * picked for load-balance and preemption/IRQs are still
  20629. - * disabled avoiding further scheduler activity on it and we've
  20630. - * not yet picked a replacement task.
  20631. - */
  20632. - lockdep_unpin_lock(&rq->lock);
  20633. - raw_spin_unlock(&rq->lock);
  20634. - raw_spin_lock(&p->pi_lock);
  20635. - raw_spin_lock(&rq->lock);
  20636. - lockdep_pin_lock(&rq->lock);
  20637. - }
  20638. -
  20639. - if (!(p->state & TASK_NORMAL))
  20640. - goto out;
  20641. -
  20642. - trace_sched_waking(p);
  20643. -
  20644. - if (!task_on_rq_queued(p))
  20645. - ttwu_activate(rq, p, ENQUEUE_WAKEUP);
  20646. -
  20647. - ttwu_do_wakeup(rq, p, 0);
  20648. - ttwu_stat(p, smp_processor_id(), 0);
  20649. -out:
  20650. - raw_spin_unlock(&p->pi_lock);
  20651. -}
  20652. -
  20653. -/**
  20654. * wake_up_process - Wake up a specific process
  20655. * @p: The process to be woken up.
  20656. *
  20657. @@ -2093,6 +2203,18 @@
  20658. }
  20659. EXPORT_SYMBOL(wake_up_process);
  20660. +/**
  20661. + * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock"
  20662. + * @p: The process to be woken up.
  20663. + *
  20664. + * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate
  20665. + * the nature of the wakeup.
  20666. + */
  20667. +int wake_up_lock_sleeper(struct task_struct *p)
  20668. +{
  20669. + return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER);
  20670. +}
  20671. +
  20672. int wake_up_state(struct task_struct *p, unsigned int state)
  20673. {
  20674. return try_to_wake_up(p, state, 0);
  20675. @@ -2279,6 +2401,9 @@
  20676. p->on_cpu = 0;
  20677. #endif
  20678. init_task_preempt_count(p);
  20679. +#ifdef CONFIG_HAVE_PREEMPT_LAZY
  20680. + task_thread_info(p)->preempt_lazy_count = 0;
  20681. +#endif
  20682. #ifdef CONFIG_SMP
  20683. plist_node_init(&p->pushable_tasks, MAX_PRIO);
  20684. RB_CLEAR_NODE(&p->pushable_dl_tasks);
  20685. @@ -2603,8 +2728,12 @@
  20686. finish_arch_post_lock_switch();
  20687. fire_sched_in_preempt_notifiers(current);
  20688. + /*
  20689. + * We use mmdrop_delayed() here so we don't have to do the
  20690. + * full __mmdrop() when we are the last user.
  20691. + */
  20692. if (mm)
  20693. - mmdrop(mm);
  20694. + mmdrop_delayed(mm);
  20695. if (unlikely(prev_state == TASK_DEAD)) {
  20696. if (prev->sched_class->task_dead)
  20697. prev->sched_class->task_dead(prev);
  20698. @@ -2935,16 +3064,6 @@
  20699. }
  20700. #endif
  20701. -notrace unsigned long get_parent_ip(unsigned long addr)
  20702. -{
  20703. - if (in_lock_functions(addr)) {
  20704. - addr = CALLER_ADDR2;
  20705. - if (in_lock_functions(addr))
  20706. - addr = CALLER_ADDR3;
  20707. - }
  20708. - return addr;
  20709. -}
  20710. -
  20711. #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
  20712. defined(CONFIG_PREEMPT_TRACER))
  20713. @@ -2966,7 +3085,7 @@
  20714. PREEMPT_MASK - 10);
  20715. #endif
  20716. if (preempt_count() == val) {
  20717. - unsigned long ip = get_parent_ip(CALLER_ADDR1);
  20718. + unsigned long ip = get_lock_parent_ip();
  20719. #ifdef CONFIG_DEBUG_PREEMPT
  20720. current->preempt_disable_ip = ip;
  20721. #endif
  20722. @@ -2993,7 +3112,7 @@
  20723. #endif
  20724. if (preempt_count() == val)
  20725. - trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
  20726. + trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
  20727. __preempt_count_sub(val);
  20728. }
  20729. EXPORT_SYMBOL(preempt_count_sub);
  20730. @@ -3048,6 +3167,77 @@
  20731. schedstat_inc(this_rq(), sched_count);
  20732. }
  20733. +#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP)
  20734. +
  20735. +void migrate_disable(void)
  20736. +{
  20737. + struct task_struct *p = current;
  20738. +
  20739. + if (in_atomic() || irqs_disabled()) {
  20740. +#ifdef CONFIG_SCHED_DEBUG
  20741. + p->migrate_disable_atomic++;
  20742. +#endif
  20743. + return;
  20744. + }
  20745. +
  20746. +#ifdef CONFIG_SCHED_DEBUG
  20747. + if (unlikely(p->migrate_disable_atomic)) {
  20748. + tracing_off();
  20749. + WARN_ON_ONCE(1);
  20750. + }
  20751. +#endif
  20752. +
  20753. + if (p->migrate_disable) {
  20754. + p->migrate_disable++;
  20755. + return;
  20756. + }
  20757. +
  20758. + preempt_disable();
  20759. + preempt_lazy_disable();
  20760. + pin_current_cpu();
  20761. + p->migrate_disable = 1;
  20762. + preempt_enable();
  20763. +}
  20764. +EXPORT_SYMBOL(migrate_disable);
  20765. +
  20766. +void migrate_enable(void)
  20767. +{
  20768. + struct task_struct *p = current;
  20769. +
  20770. + if (in_atomic() || irqs_disabled()) {
  20771. +#ifdef CONFIG_SCHED_DEBUG
  20772. + p->migrate_disable_atomic--;
  20773. +#endif
  20774. + return;
  20775. + }
  20776. +
  20777. +#ifdef CONFIG_SCHED_DEBUG
  20778. + if (unlikely(p->migrate_disable_atomic)) {
  20779. + tracing_off();
  20780. + WARN_ON_ONCE(1);
  20781. + }
  20782. +#endif
  20783. + WARN_ON_ONCE(p->migrate_disable <= 0);
  20784. +
  20785. + if (p->migrate_disable > 1) {
  20786. + p->migrate_disable--;
  20787. + return;
  20788. + }
  20789. +
  20790. + preempt_disable();
  20791. + /*
  20792. + * Clearing migrate_disable causes tsk_cpus_allowed to
  20793. + * show the tasks original cpu affinity.
  20794. + */
  20795. + p->migrate_disable = 0;
  20796. +
  20797. + unpin_current_cpu();
  20798. + preempt_enable();
  20799. + preempt_lazy_enable();
  20800. +}
  20801. +EXPORT_SYMBOL(migrate_enable);
  20802. +#endif
  20803. +
  20804. /*
  20805. * Pick up the highest-prio task:
  20806. */
  20807. @@ -3172,19 +3362,6 @@
  20808. } else {
  20809. deactivate_task(rq, prev, DEQUEUE_SLEEP);
  20810. prev->on_rq = 0;
  20811. -
  20812. - /*
  20813. - * If a worker went to sleep, notify and ask workqueue
  20814. - * whether it wants to wake up a task to maintain
  20815. - * concurrency.
  20816. - */
  20817. - if (prev->flags & PF_WQ_WORKER) {
  20818. - struct task_struct *to_wakeup;
  20819. -
  20820. - to_wakeup = wq_worker_sleeping(prev, cpu);
  20821. - if (to_wakeup)
  20822. - try_to_wake_up_local(to_wakeup);
  20823. - }
  20824. }
  20825. switch_count = &prev->nvcsw;
  20826. }
  20827. @@ -3194,6 +3371,7 @@
  20828. next = pick_next_task(rq, prev);
  20829. clear_tsk_need_resched(prev);
  20830. + clear_tsk_need_resched_lazy(prev);
  20831. clear_preempt_need_resched();
  20832. rq->clock_skip_update = 0;
  20833. @@ -3215,9 +3393,20 @@
  20834. static inline void sched_submit_work(struct task_struct *tsk)
  20835. {
  20836. - if (!tsk->state || tsk_is_pi_blocked(tsk))
  20837. + if (!tsk->state)
  20838. return;
  20839. /*
  20840. + * If a worker went to sleep, notify and ask workqueue whether
  20841. + * it wants to wake up a task to maintain concurrency.
  20842. + */
  20843. + if (tsk->flags & PF_WQ_WORKER)
  20844. + wq_worker_sleeping(tsk);
  20845. +
  20846. +
  20847. + if (tsk_is_pi_blocked(tsk))
  20848. + return;
  20849. +
  20850. + /*
  20851. * If we are going to sleep and we have plugged IO queued,
  20852. * make sure to submit it to avoid deadlocks.
  20853. */
  20854. @@ -3225,6 +3414,12 @@
  20855. blk_schedule_flush_plug(tsk);
  20856. }
  20857. +static void sched_update_worker(struct task_struct *tsk)
  20858. +{
  20859. + if (tsk->flags & PF_WQ_WORKER)
  20860. + wq_worker_running(tsk);
  20861. +}
  20862. +
  20863. asmlinkage __visible void __sched schedule(void)
  20864. {
  20865. struct task_struct *tsk = current;
  20866. @@ -3235,6 +3430,7 @@
  20867. __schedule(false);
  20868. sched_preempt_enable_no_resched();
  20869. } while (need_resched());
  20870. + sched_update_worker(tsk);
  20871. }
  20872. EXPORT_SYMBOL(schedule);
  20873. @@ -3283,6 +3479,30 @@
  20874. } while (need_resched());
  20875. }
  20876. +#ifdef CONFIG_PREEMPT_LAZY
  20877. +/*
  20878. + * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is
  20879. + * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as
  20880. + * preempt_lazy_count counter >0.
  20881. + */
  20882. +static __always_inline int preemptible_lazy(void)
  20883. +{
  20884. + if (test_thread_flag(TIF_NEED_RESCHED))
  20885. + return 1;
  20886. + if (current_thread_info()->preempt_lazy_count)
  20887. + return 0;
  20888. + return 1;
  20889. +}
  20890. +
  20891. +#else
  20892. +
  20893. +static inline int preemptible_lazy(void)
  20894. +{
  20895. + return 1;
  20896. +}
  20897. +
  20898. +#endif
  20899. +
  20900. #ifdef CONFIG_PREEMPT
  20901. /*
  20902. * this is the entry point to schedule() from in-kernel preemption
  20903. @@ -3297,6 +3517,8 @@
  20904. */
  20905. if (likely(!preemptible()))
  20906. return;
  20907. + if (!preemptible_lazy())
  20908. + return;
  20909. preempt_schedule_common();
  20910. }
  20911. @@ -3323,6 +3545,8 @@
  20912. if (likely(!preemptible()))
  20913. return;
  20914. + if (!preemptible_lazy())
  20915. + return;
  20916. do {
  20917. preempt_disable_notrace();
  20918. @@ -3332,7 +3556,16 @@
  20919. * an infinite recursion.
  20920. */
  20921. prev_ctx = exception_enter();
  20922. + /*
  20923. + * The add/subtract must not be traced by the function
  20924. + * tracer. But we still want to account for the
  20925. + * preempt off latency tracer. Since the _notrace versions
  20926. + * of add/subtract skip the accounting for latency tracer
  20927. + * we must force it manually.
  20928. + */
  20929. + start_critical_timings();
  20930. __schedule(true);
  20931. + stop_critical_timings();
  20932. exception_exit(prev_ctx);
  20933. preempt_enable_no_resched_notrace();
  20934. @@ -4676,6 +4909,7 @@
  20935. }
  20936. EXPORT_SYMBOL(__cond_resched_lock);
  20937. +#ifndef CONFIG_PREEMPT_RT_FULL
  20938. int __sched __cond_resched_softirq(void)
  20939. {
  20940. BUG_ON(!in_softirq());
  20941. @@ -4689,6 +4923,7 @@
  20942. return 0;
  20943. }
  20944. EXPORT_SYMBOL(__cond_resched_softirq);
  20945. +#endif
  20946. /**
  20947. * yield - yield the current processor to other threads.
  20948. @@ -5055,7 +5290,9 @@
  20949. /* Set the preempt count _outside_ the spinlocks! */
  20950. init_idle_preempt_count(idle, cpu);
  20951. -
  20952. +#ifdef CONFIG_HAVE_PREEMPT_LAZY
  20953. + task_thread_info(idle)->preempt_lazy_count = 0;
  20954. +#endif
  20955. /*
  20956. * The idle tasks have their own, simple scheduling class:
  20957. */
  20958. @@ -5196,6 +5433,8 @@
  20959. #endif /* CONFIG_NUMA_BALANCING */
  20960. #ifdef CONFIG_HOTPLUG_CPU
  20961. +static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm);
  20962. +
  20963. /*
  20964. * Ensures that the idle task is using init_mm right before its cpu goes
  20965. * offline.
  20966. @@ -5210,7 +5449,11 @@
  20967. switch_mm(mm, &init_mm, current);
  20968. finish_arch_post_lock_switch();
  20969. }
  20970. - mmdrop(mm);
  20971. + /*
  20972. + * Defer the cleanup to an alive cpu. On RT we can neither
  20973. + * call mmdrop() nor mmdrop_delayed() from here.
  20974. + */
  20975. + per_cpu(idle_last_mm, smp_processor_id()) = mm;
  20976. }
  20977. /*
  20978. @@ -5583,6 +5826,10 @@
  20979. case CPU_DEAD:
  20980. calc_load_migrate(rq);
  20981. + if (per_cpu(idle_last_mm, cpu)) {
  20982. + mmdrop(per_cpu(idle_last_mm, cpu));
  20983. + per_cpu(idle_last_mm, cpu) = NULL;
  20984. + }
  20985. break;
  20986. #endif
  20987. }
  20988. @@ -7566,7 +7813,7 @@
  20989. #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
  20990. static inline int preempt_count_equals(int preempt_offset)
  20991. {
  20992. - int nested = preempt_count() + rcu_preempt_depth();
  20993. + int nested = preempt_count() + sched_rcu_preempt_depth();
  20994. return (nested == preempt_offset);
  20995. }
  20996. diff -Nur linux-4.4.62.orig/kernel/sched/cpudeadline.c linux-4.4.62/kernel/sched/cpudeadline.c
  20997. --- linux-4.4.62.orig/kernel/sched/cpudeadline.c 2017-04-18 07:15:37.000000000 +0200
  20998. +++ linux-4.4.62/kernel/sched/cpudeadline.c 2017-04-18 17:38:08.218650175 +0200
  20999. @@ -103,10 +103,10 @@
  21000. const struct sched_dl_entity *dl_se = &p->dl;
  21001. if (later_mask &&
  21002. - cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
  21003. + cpumask_and(later_mask, cp->free_cpus, tsk_cpus_allowed(p))) {
  21004. best_cpu = cpumask_any(later_mask);
  21005. goto out;
  21006. - } else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
  21007. + } else if (cpumask_test_cpu(cpudl_maximum(cp), tsk_cpus_allowed(p)) &&
  21008. dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
  21009. best_cpu = cpudl_maximum(cp);
  21010. if (later_mask)
  21011. diff -Nur linux-4.4.62.orig/kernel/sched/cpupri.c linux-4.4.62/kernel/sched/cpupri.c
  21012. --- linux-4.4.62.orig/kernel/sched/cpupri.c 2017-04-18 07:15:37.000000000 +0200
  21013. +++ linux-4.4.62/kernel/sched/cpupri.c 2017-04-18 17:38:08.218650175 +0200
  21014. @@ -103,11 +103,11 @@
  21015. if (skip)
  21016. continue;
  21017. - if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
  21018. + if (cpumask_any_and(tsk_cpus_allowed(p), vec->mask) >= nr_cpu_ids)
  21019. continue;
  21020. if (lowest_mask) {
  21021. - cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
  21022. + cpumask_and(lowest_mask, tsk_cpus_allowed(p), vec->mask);
  21023. /*
  21024. * We have to ensure that we have at least one bit
  21025. diff -Nur linux-4.4.62.orig/kernel/sched/cputime.c linux-4.4.62/kernel/sched/cputime.c
  21026. --- linux-4.4.62.orig/kernel/sched/cputime.c 2017-04-18 07:15:37.000000000 +0200
  21027. +++ linux-4.4.62/kernel/sched/cputime.c 2017-04-18 17:38:08.218650175 +0200
  21028. @@ -685,7 +685,7 @@
  21029. {
  21030. unsigned long long delta = vtime_delta(tsk);
  21031. - WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING);
  21032. + WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
  21033. tsk->vtime_snap += delta;
  21034. /* CHECKME: always safe to convert nsecs to cputime? */
  21035. @@ -701,37 +701,37 @@
  21036. void vtime_account_system(struct task_struct *tsk)
  21037. {
  21038. - write_seqlock(&tsk->vtime_seqlock);
  21039. + write_seqcount_begin(&tsk->vtime_seqcount);
  21040. __vtime_account_system(tsk);
  21041. - write_sequnlock(&tsk->vtime_seqlock);
  21042. + write_seqcount_end(&tsk->vtime_seqcount);
  21043. }
  21044. void vtime_gen_account_irq_exit(struct task_struct *tsk)
  21045. {
  21046. - write_seqlock(&tsk->vtime_seqlock);
  21047. + write_seqcount_begin(&tsk->vtime_seqcount);
  21048. __vtime_account_system(tsk);
  21049. if (context_tracking_in_user())
  21050. tsk->vtime_snap_whence = VTIME_USER;
  21051. - write_sequnlock(&tsk->vtime_seqlock);
  21052. + write_seqcount_end(&tsk->vtime_seqcount);
  21053. }
  21054. void vtime_account_user(struct task_struct *tsk)
  21055. {
  21056. cputime_t delta_cpu;
  21057. - write_seqlock(&tsk->vtime_seqlock);
  21058. + write_seqcount_begin(&tsk->vtime_seqcount);
  21059. delta_cpu = get_vtime_delta(tsk);
  21060. tsk->vtime_snap_whence = VTIME_SYS;
  21061. account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
  21062. - write_sequnlock(&tsk->vtime_seqlock);
  21063. + write_seqcount_end(&tsk->vtime_seqcount);
  21064. }
  21065. void vtime_user_enter(struct task_struct *tsk)
  21066. {
  21067. - write_seqlock(&tsk->vtime_seqlock);
  21068. + write_seqcount_begin(&tsk->vtime_seqcount);
  21069. __vtime_account_system(tsk);
  21070. tsk->vtime_snap_whence = VTIME_USER;
  21071. - write_sequnlock(&tsk->vtime_seqlock);
  21072. + write_seqcount_end(&tsk->vtime_seqcount);
  21073. }
  21074. void vtime_guest_enter(struct task_struct *tsk)
  21075. @@ -743,19 +743,19 @@
  21076. * synchronization against the reader (task_gtime())
  21077. * that can thus safely catch up with a tickless delta.
  21078. */
  21079. - write_seqlock(&tsk->vtime_seqlock);
  21080. + write_seqcount_begin(&tsk->vtime_seqcount);
  21081. __vtime_account_system(tsk);
  21082. current->flags |= PF_VCPU;
  21083. - write_sequnlock(&tsk->vtime_seqlock);
  21084. + write_seqcount_end(&tsk->vtime_seqcount);
  21085. }
  21086. EXPORT_SYMBOL_GPL(vtime_guest_enter);
  21087. void vtime_guest_exit(struct task_struct *tsk)
  21088. {
  21089. - write_seqlock(&tsk->vtime_seqlock);
  21090. + write_seqcount_begin(&tsk->vtime_seqcount);
  21091. __vtime_account_system(tsk);
  21092. current->flags &= ~PF_VCPU;
  21093. - write_sequnlock(&tsk->vtime_seqlock);
  21094. + write_seqcount_end(&tsk->vtime_seqcount);
  21095. }
  21096. EXPORT_SYMBOL_GPL(vtime_guest_exit);
  21097. @@ -768,24 +768,26 @@
  21098. void arch_vtime_task_switch(struct task_struct *prev)
  21099. {
  21100. - write_seqlock(&prev->vtime_seqlock);
  21101. - prev->vtime_snap_whence = VTIME_SLEEPING;
  21102. - write_sequnlock(&prev->vtime_seqlock);
  21103. + write_seqcount_begin(&prev->vtime_seqcount);
  21104. + prev->vtime_snap_whence = VTIME_INACTIVE;
  21105. + write_seqcount_end(&prev->vtime_seqcount);
  21106. - write_seqlock(&current->vtime_seqlock);
  21107. + write_seqcount_begin(&current->vtime_seqcount);
  21108. current->vtime_snap_whence = VTIME_SYS;
  21109. current->vtime_snap = sched_clock_cpu(smp_processor_id());
  21110. - write_sequnlock(&current->vtime_seqlock);
  21111. + write_seqcount_end(&current->vtime_seqcount);
  21112. }
  21113. void vtime_init_idle(struct task_struct *t, int cpu)
  21114. {
  21115. unsigned long flags;
  21116. - write_seqlock_irqsave(&t->vtime_seqlock, flags);
  21117. + local_irq_save(flags);
  21118. + write_seqcount_begin(&t->vtime_seqcount);
  21119. t->vtime_snap_whence = VTIME_SYS;
  21120. t->vtime_snap = sched_clock_cpu(cpu);
  21121. - write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
  21122. + write_seqcount_end(&t->vtime_seqcount);
  21123. + local_irq_restore(flags);
  21124. }
  21125. cputime_t task_gtime(struct task_struct *t)
  21126. @@ -797,13 +799,13 @@
  21127. return t->gtime;
  21128. do {
  21129. - seq = read_seqbegin(&t->vtime_seqlock);
  21130. + seq = read_seqcount_begin(&t->vtime_seqcount);
  21131. gtime = t->gtime;
  21132. if (t->flags & PF_VCPU)
  21133. gtime += vtime_delta(t);
  21134. - } while (read_seqretry(&t->vtime_seqlock, seq));
  21135. + } while (read_seqcount_retry(&t->vtime_seqcount, seq));
  21136. return gtime;
  21137. }
  21138. @@ -826,7 +828,7 @@
  21139. *udelta = 0;
  21140. *sdelta = 0;
  21141. - seq = read_seqbegin(&t->vtime_seqlock);
  21142. + seq = read_seqcount_begin(&t->vtime_seqcount);
  21143. if (u_dst)
  21144. *u_dst = *u_src;
  21145. @@ -834,7 +836,7 @@
  21146. *s_dst = *s_src;
  21147. /* Task is sleeping, nothing to add */
  21148. - if (t->vtime_snap_whence == VTIME_SLEEPING ||
  21149. + if (t->vtime_snap_whence == VTIME_INACTIVE ||
  21150. is_idle_task(t))
  21151. continue;
  21152. @@ -850,7 +852,7 @@
  21153. if (t->vtime_snap_whence == VTIME_SYS)
  21154. *sdelta = delta;
  21155. }
  21156. - } while (read_seqretry(&t->vtime_seqlock, seq));
  21157. + } while (read_seqcount_retry(&t->vtime_seqcount, seq));
  21158. }
  21159. diff -Nur linux-4.4.62.orig/kernel/sched/deadline.c linux-4.4.62/kernel/sched/deadline.c
  21160. --- linux-4.4.62.orig/kernel/sched/deadline.c 2017-04-18 07:15:37.000000000 +0200
  21161. +++ linux-4.4.62/kernel/sched/deadline.c 2017-04-18 17:38:08.218650175 +0200
  21162. @@ -134,7 +134,7 @@
  21163. {
  21164. struct task_struct *p = dl_task_of(dl_se);
  21165. - if (p->nr_cpus_allowed > 1)
  21166. + if (tsk_nr_cpus_allowed(p) > 1)
  21167. dl_rq->dl_nr_migratory++;
  21168. update_dl_migration(dl_rq);
  21169. @@ -144,7 +144,7 @@
  21170. {
  21171. struct task_struct *p = dl_task_of(dl_se);
  21172. - if (p->nr_cpus_allowed > 1)
  21173. + if (tsk_nr_cpus_allowed(p) > 1)
  21174. dl_rq->dl_nr_migratory--;
  21175. update_dl_migration(dl_rq);
  21176. @@ -697,6 +697,7 @@
  21177. hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  21178. timer->function = dl_task_timer;
  21179. + timer->irqsafe = 1;
  21180. }
  21181. static
  21182. @@ -989,7 +990,7 @@
  21183. enqueue_dl_entity(&p->dl, pi_se, flags);
  21184. - if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
  21185. + if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1)
  21186. enqueue_pushable_dl_task(rq, p);
  21187. }
  21188. @@ -1067,9 +1068,9 @@
  21189. * try to make it stay here, it might be important.
  21190. */
  21191. if (unlikely(dl_task(curr)) &&
  21192. - (curr->nr_cpus_allowed < 2 ||
  21193. + (tsk_nr_cpus_allowed(curr) < 2 ||
  21194. !dl_entity_preempt(&p->dl, &curr->dl)) &&
  21195. - (p->nr_cpus_allowed > 1)) {
  21196. + (tsk_nr_cpus_allowed(p) > 1)) {
  21197. int target = find_later_rq(p);
  21198. if (target != -1 &&
  21199. @@ -1090,7 +1091,7 @@
  21200. * Current can't be migrated, useless to reschedule,
  21201. * let's hope p can move out.
  21202. */
  21203. - if (rq->curr->nr_cpus_allowed == 1 ||
  21204. + if (tsk_nr_cpus_allowed(rq->curr) == 1 ||
  21205. cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
  21206. return;
  21207. @@ -1098,7 +1099,7 @@
  21208. * p is migratable, so let's not schedule it and
  21209. * see if it is pushed or pulled somewhere else.
  21210. */
  21211. - if (p->nr_cpus_allowed != 1 &&
  21212. + if (tsk_nr_cpus_allowed(p) != 1 &&
  21213. cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
  21214. return;
  21215. @@ -1212,7 +1213,7 @@
  21216. {
  21217. update_curr_dl(rq);
  21218. - if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
  21219. + if (on_dl_rq(&p->dl) && tsk_nr_cpus_allowed(p) > 1)
  21220. enqueue_pushable_dl_task(rq, p);
  21221. }
  21222. @@ -1335,7 +1336,7 @@
  21223. if (unlikely(!later_mask))
  21224. return -1;
  21225. - if (task->nr_cpus_allowed == 1)
  21226. + if (tsk_nr_cpus_allowed(task) == 1)
  21227. return -1;
  21228. /*
  21229. @@ -1441,7 +1442,7 @@
  21230. if (double_lock_balance(rq, later_rq)) {
  21231. if (unlikely(task_rq(task) != rq ||
  21232. !cpumask_test_cpu(later_rq->cpu,
  21233. - &task->cpus_allowed) ||
  21234. + tsk_cpus_allowed(task)) ||
  21235. task_running(rq, task) ||
  21236. !task_on_rq_queued(task))) {
  21237. double_unlock_balance(rq, later_rq);
  21238. @@ -1480,7 +1481,7 @@
  21239. BUG_ON(rq->cpu != task_cpu(p));
  21240. BUG_ON(task_current(rq, p));
  21241. - BUG_ON(p->nr_cpus_allowed <= 1);
  21242. + BUG_ON(tsk_nr_cpus_allowed(p) <= 1);
  21243. BUG_ON(!task_on_rq_queued(p));
  21244. BUG_ON(!dl_task(p));
  21245. @@ -1519,7 +1520,7 @@
  21246. */
  21247. if (dl_task(rq->curr) &&
  21248. dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
  21249. - rq->curr->nr_cpus_allowed > 1) {
  21250. + tsk_nr_cpus_allowed(rq->curr) > 1) {
  21251. resched_curr(rq);
  21252. return 0;
  21253. }
  21254. @@ -1666,9 +1667,9 @@
  21255. {
  21256. if (!task_running(rq, p) &&
  21257. !test_tsk_need_resched(rq->curr) &&
  21258. - p->nr_cpus_allowed > 1 &&
  21259. + tsk_nr_cpus_allowed(p) > 1 &&
  21260. dl_task(rq->curr) &&
  21261. - (rq->curr->nr_cpus_allowed < 2 ||
  21262. + (tsk_nr_cpus_allowed(rq->curr) < 2 ||
  21263. !dl_entity_preempt(&p->dl, &rq->curr->dl))) {
  21264. push_dl_tasks(rq);
  21265. }
  21266. @@ -1769,7 +1770,7 @@
  21267. {
  21268. if (task_on_rq_queued(p) && rq->curr != p) {
  21269. #ifdef CONFIG_SMP
  21270. - if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
  21271. + if (tsk_nr_cpus_allowed(p) > 1 && rq->dl.overloaded)
  21272. queue_push_tasks(rq);
  21273. #endif
  21274. if (dl_task(rq->curr))
  21275. diff -Nur linux-4.4.62.orig/kernel/sched/debug.c linux-4.4.62/kernel/sched/debug.c
  21276. --- linux-4.4.62.orig/kernel/sched/debug.c 2017-04-18 07:15:37.000000000 +0200
  21277. +++ linux-4.4.62/kernel/sched/debug.c 2017-04-18 17:38:08.218650175 +0200
  21278. @@ -251,6 +251,9 @@
  21279. P(rt_throttled);
  21280. PN(rt_time);
  21281. PN(rt_runtime);
  21282. +#ifdef CONFIG_SMP
  21283. + P(rt_nr_migratory);
  21284. +#endif
  21285. #undef PN
  21286. #undef P
  21287. @@ -635,6 +638,10 @@
  21288. #endif
  21289. P(policy);
  21290. P(prio);
  21291. +#ifdef CONFIG_PREEMPT_RT_FULL
  21292. + P(migrate_disable);
  21293. +#endif
  21294. + P(nr_cpus_allowed);
  21295. #undef PN
  21296. #undef __PN
  21297. #undef P
  21298. diff -Nur linux-4.4.62.orig/kernel/sched/fair.c linux-4.4.62/kernel/sched/fair.c
  21299. --- linux-4.4.62.orig/kernel/sched/fair.c 2017-04-18 07:15:37.000000000 +0200
  21300. +++ linux-4.4.62/kernel/sched/fair.c 2017-04-18 17:38:08.218650175 +0200
  21301. @@ -3166,7 +3166,7 @@
  21302. ideal_runtime = sched_slice(cfs_rq, curr);
  21303. delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
  21304. if (delta_exec > ideal_runtime) {
  21305. - resched_curr(rq_of(cfs_rq));
  21306. + resched_curr_lazy(rq_of(cfs_rq));
  21307. /*
  21308. * The current task ran long enough, ensure it doesn't get
  21309. * re-elected due to buddy favours.
  21310. @@ -3190,7 +3190,7 @@
  21311. return;
  21312. if (delta > ideal_runtime)
  21313. - resched_curr(rq_of(cfs_rq));
  21314. + resched_curr_lazy(rq_of(cfs_rq));
  21315. }
  21316. static void
  21317. @@ -3330,7 +3330,7 @@
  21318. * validating it and just reschedule.
  21319. */
  21320. if (queued) {
  21321. - resched_curr(rq_of(cfs_rq));
  21322. + resched_curr_lazy(rq_of(cfs_rq));
  21323. return;
  21324. }
  21325. /*
  21326. @@ -3512,7 +3512,7 @@
  21327. * hierarchy can be throttled
  21328. */
  21329. if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
  21330. - resched_curr(rq_of(cfs_rq));
  21331. + resched_curr_lazy(rq_of(cfs_rq));
  21332. }
  21333. static __always_inline
  21334. @@ -4124,7 +4124,7 @@
  21335. if (delta < 0) {
  21336. if (rq->curr == p)
  21337. - resched_curr(rq);
  21338. + resched_curr_lazy(rq);
  21339. return;
  21340. }
  21341. hrtick_start(rq, delta);
  21342. @@ -5213,7 +5213,7 @@
  21343. return;
  21344. preempt:
  21345. - resched_curr(rq);
  21346. + resched_curr_lazy(rq);
  21347. /*
  21348. * Only set the backward buddy when the current task is still
  21349. * on the rq. This can happen when a wakeup gets interleaved
  21350. @@ -7964,7 +7964,7 @@
  21351. * 'current' within the tree based on its new key value.
  21352. */
  21353. swap(curr->vruntime, se->vruntime);
  21354. - resched_curr(rq);
  21355. + resched_curr_lazy(rq);
  21356. }
  21357. se->vruntime -= cfs_rq->min_vruntime;
  21358. @@ -7989,7 +7989,7 @@
  21359. */
  21360. if (rq->curr == p) {
  21361. if (p->prio > oldprio)
  21362. - resched_curr(rq);
  21363. + resched_curr_lazy(rq);
  21364. } else
  21365. check_preempt_curr(rq, p, 0);
  21366. }
  21367. diff -Nur linux-4.4.62.orig/kernel/sched/features.h linux-4.4.62/kernel/sched/features.h
  21368. --- linux-4.4.62.orig/kernel/sched/features.h 2017-04-18 07:15:37.000000000 +0200
  21369. +++ linux-4.4.62/kernel/sched/features.h 2017-04-18 17:38:08.218650175 +0200
  21370. @@ -45,11 +45,19 @@
  21371. */
  21372. SCHED_FEAT(NONTASK_CAPACITY, true)
  21373. +#ifdef CONFIG_PREEMPT_RT_FULL
  21374. +SCHED_FEAT(TTWU_QUEUE, false)
  21375. +# ifdef CONFIG_PREEMPT_LAZY
  21376. +SCHED_FEAT(PREEMPT_LAZY, true)
  21377. +# endif
  21378. +#else
  21379. +
  21380. /*
  21381. * Queue remote wakeups on the target CPU and process them
  21382. * using the scheduler IPI. Reduces rq->lock contention/bounces.
  21383. */
  21384. SCHED_FEAT(TTWU_QUEUE, true)
  21385. +#endif
  21386. #ifdef HAVE_RT_PUSH_IPI
  21387. /*
  21388. diff -Nur linux-4.4.62.orig/kernel/sched/Makefile linux-4.4.62/kernel/sched/Makefile
  21389. --- linux-4.4.62.orig/kernel/sched/Makefile 2017-04-18 07:15:37.000000000 +0200
  21390. +++ linux-4.4.62/kernel/sched/Makefile 2017-04-18 17:38:08.214650020 +0200
  21391. @@ -13,7 +13,7 @@
  21392. obj-y += core.o loadavg.o clock.o cputime.o
  21393. obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
  21394. -obj-y += wait.o completion.o idle.o
  21395. +obj-y += wait.o swait.o swork.o completion.o idle.o
  21396. obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o
  21397. obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
  21398. obj-$(CONFIG_SCHEDSTATS) += stats.o
  21399. diff -Nur linux-4.4.62.orig/kernel/sched/rt.c linux-4.4.62/kernel/sched/rt.c
  21400. --- linux-4.4.62.orig/kernel/sched/rt.c 2017-04-18 07:15:37.000000000 +0200
  21401. +++ linux-4.4.62/kernel/sched/rt.c 2017-04-18 17:38:08.218650175 +0200
  21402. @@ -47,6 +47,7 @@
  21403. hrtimer_init(&rt_b->rt_period_timer,
  21404. CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  21405. + rt_b->rt_period_timer.irqsafe = 1;
  21406. rt_b->rt_period_timer.function = sched_rt_period_timer;
  21407. }
  21408. @@ -93,6 +94,7 @@
  21409. rt_rq->push_cpu = nr_cpu_ids;
  21410. raw_spin_lock_init(&rt_rq->push_lock);
  21411. init_irq_work(&rt_rq->push_work, push_irq_work_func);
  21412. + rt_rq->push_work.flags |= IRQ_WORK_HARD_IRQ;
  21413. #endif
  21414. #endif /* CONFIG_SMP */
  21415. /* We start is dequeued state, because no RT tasks are queued */
  21416. @@ -326,7 +328,7 @@
  21417. rt_rq = &rq_of_rt_rq(rt_rq)->rt;
  21418. rt_rq->rt_nr_total++;
  21419. - if (p->nr_cpus_allowed > 1)
  21420. + if (tsk_nr_cpus_allowed(p) > 1)
  21421. rt_rq->rt_nr_migratory++;
  21422. update_rt_migration(rt_rq);
  21423. @@ -343,7 +345,7 @@
  21424. rt_rq = &rq_of_rt_rq(rt_rq)->rt;
  21425. rt_rq->rt_nr_total--;
  21426. - if (p->nr_cpus_allowed > 1)
  21427. + if (tsk_nr_cpus_allowed(p) > 1)
  21428. rt_rq->rt_nr_migratory--;
  21429. update_rt_migration(rt_rq);
  21430. @@ -1262,7 +1264,7 @@
  21431. enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
  21432. - if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
  21433. + if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1)
  21434. enqueue_pushable_task(rq, p);
  21435. }
  21436. @@ -1351,7 +1353,7 @@
  21437. * will have to sort it out.
  21438. */
  21439. if (curr && unlikely(rt_task(curr)) &&
  21440. - (curr->nr_cpus_allowed < 2 ||
  21441. + (tsk_nr_cpus_allowed(curr) < 2 ||
  21442. curr->prio <= p->prio)) {
  21443. int target = find_lowest_rq(p);
  21444. @@ -1375,7 +1377,7 @@
  21445. * Current can't be migrated, useless to reschedule,
  21446. * let's hope p can move out.
  21447. */
  21448. - if (rq->curr->nr_cpus_allowed == 1 ||
  21449. + if (tsk_nr_cpus_allowed(rq->curr) == 1 ||
  21450. !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
  21451. return;
  21452. @@ -1383,7 +1385,7 @@
  21453. * p is migratable, so let's not schedule it and
  21454. * see if it is pushed or pulled somewhere else.
  21455. */
  21456. - if (p->nr_cpus_allowed != 1
  21457. + if (tsk_nr_cpus_allowed(p) != 1
  21458. && cpupri_find(&rq->rd->cpupri, p, NULL))
  21459. return;
  21460. @@ -1517,7 +1519,7 @@
  21461. * The previous task needs to be made eligible for pushing
  21462. * if it is still active
  21463. */
  21464. - if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
  21465. + if (on_rt_rq(&p->rt) && tsk_nr_cpus_allowed(p) > 1)
  21466. enqueue_pushable_task(rq, p);
  21467. }
  21468. @@ -1567,7 +1569,7 @@
  21469. if (unlikely(!lowest_mask))
  21470. return -1;
  21471. - if (task->nr_cpus_allowed == 1)
  21472. + if (tsk_nr_cpus_allowed(task) == 1)
  21473. return -1; /* No other targets possible */
  21474. if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
  21475. @@ -1699,7 +1701,7 @@
  21476. BUG_ON(rq->cpu != task_cpu(p));
  21477. BUG_ON(task_current(rq, p));
  21478. - BUG_ON(p->nr_cpus_allowed <= 1);
  21479. + BUG_ON(tsk_nr_cpus_allowed(p) <= 1);
  21480. BUG_ON(!task_on_rq_queued(p));
  21481. BUG_ON(!rt_task(p));
  21482. @@ -2059,9 +2061,9 @@
  21483. {
  21484. if (!task_running(rq, p) &&
  21485. !test_tsk_need_resched(rq->curr) &&
  21486. - p->nr_cpus_allowed > 1 &&
  21487. + tsk_nr_cpus_allowed(p) > 1 &&
  21488. (dl_task(rq->curr) || rt_task(rq->curr)) &&
  21489. - (rq->curr->nr_cpus_allowed < 2 ||
  21490. + (tsk_nr_cpus_allowed(rq->curr) < 2 ||
  21491. rq->curr->prio <= p->prio))
  21492. push_rt_tasks(rq);
  21493. }
  21494. @@ -2134,7 +2136,7 @@
  21495. */
  21496. if (task_on_rq_queued(p) && rq->curr != p) {
  21497. #ifdef CONFIG_SMP
  21498. - if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
  21499. + if (tsk_nr_cpus_allowed(p) > 1 && rq->rt.overloaded)
  21500. queue_push_tasks(rq);
  21501. #endif /* CONFIG_SMP */
  21502. if (p->prio < rq->curr->prio)
  21503. diff -Nur linux-4.4.62.orig/kernel/sched/sched.h linux-4.4.62/kernel/sched/sched.h
  21504. --- linux-4.4.62.orig/kernel/sched/sched.h 2017-04-18 07:15:37.000000000 +0200
  21505. +++ linux-4.4.62/kernel/sched/sched.h 2017-04-18 17:38:08.218650175 +0200
  21506. @@ -1100,6 +1100,7 @@
  21507. #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */
  21508. #define WF_FORK 0x02 /* child wakeup after fork */
  21509. #define WF_MIGRATED 0x4 /* internal use, task got migrated */
  21510. +#define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */
  21511. /*
  21512. * To aid in avoiding the subversion of "niceness" due to uneven distribution
  21513. @@ -1299,6 +1300,15 @@
  21514. extern void resched_curr(struct rq *rq);
  21515. extern void resched_cpu(int cpu);
  21516. +#ifdef CONFIG_PREEMPT_LAZY
  21517. +extern void resched_curr_lazy(struct rq *rq);
  21518. +#else
  21519. +static inline void resched_curr_lazy(struct rq *rq)
  21520. +{
  21521. + resched_curr(rq);
  21522. +}
  21523. +#endif
  21524. +
  21525. extern struct rt_bandwidth def_rt_bandwidth;
  21526. extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
  21527. diff -Nur linux-4.4.62.orig/kernel/sched/swait.c linux-4.4.62/kernel/sched/swait.c
  21528. --- linux-4.4.62.orig/kernel/sched/swait.c 1970-01-01 01:00:00.000000000 +0100
  21529. +++ linux-4.4.62/kernel/sched/swait.c 2017-04-18 17:38:08.218650175 +0200
  21530. @@ -0,0 +1,143 @@
  21531. +#include <linux/sched.h>
  21532. +#include <linux/swait.h>
  21533. +#include <linux/suspend.h>
  21534. +
  21535. +void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
  21536. + struct lock_class_key *key)
  21537. +{
  21538. + raw_spin_lock_init(&q->lock);
  21539. + lockdep_set_class_and_name(&q->lock, key, name);
  21540. + INIT_LIST_HEAD(&q->task_list);
  21541. +}
  21542. +EXPORT_SYMBOL(__init_swait_queue_head);
  21543. +
  21544. +/*
  21545. + * The thing about the wake_up_state() return value; I think we can ignore it.
  21546. + *
  21547. + * If for some reason it would return 0, that means the previously waiting
  21548. + * task is already running, so it will observe condition true (or has already).
  21549. + */
  21550. +void swake_up_locked(struct swait_queue_head *q)
  21551. +{
  21552. + struct swait_queue *curr;
  21553. +
  21554. + if (list_empty(&q->task_list))
  21555. + return;
  21556. +
  21557. + curr = list_first_entry(&q->task_list, typeof(*curr), task_list);
  21558. + wake_up_process(curr->task);
  21559. + list_del_init(&curr->task_list);
  21560. +}
  21561. +EXPORT_SYMBOL(swake_up_locked);
  21562. +
  21563. +void swake_up_all_locked(struct swait_queue_head *q)
  21564. +{
  21565. + struct swait_queue *curr;
  21566. + int wakes = 0;
  21567. +
  21568. + while (!list_empty(&q->task_list)) {
  21569. +
  21570. + curr = list_first_entry(&q->task_list, typeof(*curr),
  21571. + task_list);
  21572. + wake_up_process(curr->task);
  21573. + list_del_init(&curr->task_list);
  21574. + wakes++;
  21575. + }
  21576. + if (pm_in_action)
  21577. + return;
  21578. + WARN(wakes > 2, "complate_all() with %d waiters\n", wakes);
  21579. +}
  21580. +EXPORT_SYMBOL(swake_up_all_locked);
  21581. +
  21582. +void swake_up(struct swait_queue_head *q)
  21583. +{
  21584. + unsigned long flags;
  21585. +
  21586. + if (!swait_active(q))
  21587. + return;
  21588. +
  21589. + raw_spin_lock_irqsave(&q->lock, flags);
  21590. + swake_up_locked(q);
  21591. + raw_spin_unlock_irqrestore(&q->lock, flags);
  21592. +}
  21593. +EXPORT_SYMBOL(swake_up);
  21594. +
  21595. +/*
  21596. + * Does not allow usage from IRQ disabled, since we must be able to
  21597. + * release IRQs to guarantee bounded hold time.
  21598. + */
  21599. +void swake_up_all(struct swait_queue_head *q)
  21600. +{
  21601. + struct swait_queue *curr;
  21602. + LIST_HEAD(tmp);
  21603. +
  21604. + if (!swait_active(q))
  21605. + return;
  21606. +
  21607. + raw_spin_lock_irq(&q->lock);
  21608. + list_splice_init(&q->task_list, &tmp);
  21609. + while (!list_empty(&tmp)) {
  21610. + curr = list_first_entry(&tmp, typeof(*curr), task_list);
  21611. +
  21612. + wake_up_state(curr->task, TASK_NORMAL);
  21613. + list_del_init(&curr->task_list);
  21614. +
  21615. + if (list_empty(&tmp))
  21616. + break;
  21617. +
  21618. + raw_spin_unlock_irq(&q->lock);
  21619. + raw_spin_lock_irq(&q->lock);
  21620. + }
  21621. + raw_spin_unlock_irq(&q->lock);
  21622. +}
  21623. +EXPORT_SYMBOL(swake_up_all);
  21624. +
  21625. +void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait)
  21626. +{
  21627. + wait->task = current;
  21628. + if (list_empty(&wait->task_list))
  21629. + list_add(&wait->task_list, &q->task_list);
  21630. +}
  21631. +
  21632. +void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state)
  21633. +{
  21634. + unsigned long flags;
  21635. +
  21636. + raw_spin_lock_irqsave(&q->lock, flags);
  21637. + __prepare_to_swait(q, wait);
  21638. + set_current_state(state);
  21639. + raw_spin_unlock_irqrestore(&q->lock, flags);
  21640. +}
  21641. +EXPORT_SYMBOL(prepare_to_swait);
  21642. +
  21643. +long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state)
  21644. +{
  21645. + if (signal_pending_state(state, current))
  21646. + return -ERESTARTSYS;
  21647. +
  21648. + prepare_to_swait(q, wait, state);
  21649. +
  21650. + return 0;
  21651. +}
  21652. +EXPORT_SYMBOL(prepare_to_swait_event);
  21653. +
  21654. +void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait)
  21655. +{
  21656. + __set_current_state(TASK_RUNNING);
  21657. + if (!list_empty(&wait->task_list))
  21658. + list_del_init(&wait->task_list);
  21659. +}
  21660. +
  21661. +void finish_swait(struct swait_queue_head *q, struct swait_queue *wait)
  21662. +{
  21663. + unsigned long flags;
  21664. +
  21665. + __set_current_state(TASK_RUNNING);
  21666. +
  21667. + if (!list_empty_careful(&wait->task_list)) {
  21668. + raw_spin_lock_irqsave(&q->lock, flags);
  21669. + list_del_init(&wait->task_list);
  21670. + raw_spin_unlock_irqrestore(&q->lock, flags);
  21671. + }
  21672. +}
  21673. +EXPORT_SYMBOL(finish_swait);
  21674. diff -Nur linux-4.4.62.orig/kernel/sched/swork.c linux-4.4.62/kernel/sched/swork.c
  21675. --- linux-4.4.62.orig/kernel/sched/swork.c 1970-01-01 01:00:00.000000000 +0100
  21676. +++ linux-4.4.62/kernel/sched/swork.c 2017-04-18 17:38:08.218650175 +0200
  21677. @@ -0,0 +1,173 @@
  21678. +/*
  21679. + * Copyright (C) 2014 BMW Car IT GmbH, Daniel Wagner daniel.wagner@bmw-carit.de
  21680. + *
  21681. + * Provides a framework for enqueuing callbacks from irq context
  21682. + * PREEMPT_RT_FULL safe. The callbacks are executed in kthread context.
  21683. + */
  21684. +
  21685. +#include <linux/swait.h>
  21686. +#include <linux/swork.h>
  21687. +#include <linux/kthread.h>
  21688. +#include <linux/slab.h>
  21689. +#include <linux/spinlock.h>
  21690. +#include <linux/export.h>
  21691. +
  21692. +#define SWORK_EVENT_PENDING (1 << 0)
  21693. +
  21694. +static DEFINE_MUTEX(worker_mutex);
  21695. +static struct sworker *glob_worker;
  21696. +
  21697. +struct sworker {
  21698. + struct list_head events;
  21699. + struct swait_queue_head wq;
  21700. +
  21701. + raw_spinlock_t lock;
  21702. +
  21703. + struct task_struct *task;
  21704. + int refs;
  21705. +};
  21706. +
  21707. +static bool swork_readable(struct sworker *worker)
  21708. +{
  21709. + bool r;
  21710. +
  21711. + if (kthread_should_stop())
  21712. + return true;
  21713. +
  21714. + raw_spin_lock_irq(&worker->lock);
  21715. + r = !list_empty(&worker->events);
  21716. + raw_spin_unlock_irq(&worker->lock);
  21717. +
  21718. + return r;
  21719. +}
  21720. +
  21721. +static int swork_kthread(void *arg)
  21722. +{
  21723. + struct sworker *worker = arg;
  21724. +
  21725. + for (;;) {
  21726. + swait_event_interruptible(worker->wq,
  21727. + swork_readable(worker));
  21728. + if (kthread_should_stop())
  21729. + break;
  21730. +
  21731. + raw_spin_lock_irq(&worker->lock);
  21732. + while (!list_empty(&worker->events)) {
  21733. + struct swork_event *sev;
  21734. +
  21735. + sev = list_first_entry(&worker->events,
  21736. + struct swork_event, item);
  21737. + list_del(&sev->item);
  21738. + raw_spin_unlock_irq(&worker->lock);
  21739. +
  21740. + WARN_ON_ONCE(!test_and_clear_bit(SWORK_EVENT_PENDING,
  21741. + &sev->flags));
  21742. + sev->func(sev);
  21743. + raw_spin_lock_irq(&worker->lock);
  21744. + }
  21745. + raw_spin_unlock_irq(&worker->lock);
  21746. + }
  21747. + return 0;
  21748. +}
  21749. +
  21750. +static struct sworker *swork_create(void)
  21751. +{
  21752. + struct sworker *worker;
  21753. +
  21754. + worker = kzalloc(sizeof(*worker), GFP_KERNEL);
  21755. + if (!worker)
  21756. + return ERR_PTR(-ENOMEM);
  21757. +
  21758. + INIT_LIST_HEAD(&worker->events);
  21759. + raw_spin_lock_init(&worker->lock);
  21760. + init_swait_queue_head(&worker->wq);
  21761. +
  21762. + worker->task = kthread_run(swork_kthread, worker, "kswork");
  21763. + if (IS_ERR(worker->task)) {
  21764. + kfree(worker);
  21765. + return ERR_PTR(-ENOMEM);
  21766. + }
  21767. +
  21768. + return worker;
  21769. +}
  21770. +
  21771. +static void swork_destroy(struct sworker *worker)
  21772. +{
  21773. + kthread_stop(worker->task);
  21774. +
  21775. + WARN_ON(!list_empty(&worker->events));
  21776. + kfree(worker);
  21777. +}
  21778. +
  21779. +/**
  21780. + * swork_queue - queue swork
  21781. + *
  21782. + * Returns %false if @work was already on a queue, %true otherwise.
  21783. + *
  21784. + * The work is queued and processed on a random CPU
  21785. + */
  21786. +bool swork_queue(struct swork_event *sev)
  21787. +{
  21788. + unsigned long flags;
  21789. +
  21790. + if (test_and_set_bit(SWORK_EVENT_PENDING, &sev->flags))
  21791. + return false;
  21792. +
  21793. + raw_spin_lock_irqsave(&glob_worker->lock, flags);
  21794. + list_add_tail(&sev->item, &glob_worker->events);
  21795. + raw_spin_unlock_irqrestore(&glob_worker->lock, flags);
  21796. +
  21797. + swake_up(&glob_worker->wq);
  21798. + return true;
  21799. +}
  21800. +EXPORT_SYMBOL_GPL(swork_queue);
  21801. +
  21802. +/**
  21803. + * swork_get - get an instance of the sworker
  21804. + *
  21805. + * Returns an negative error code if the initialization if the worker did not
  21806. + * work, %0 otherwise.
  21807. + *
  21808. + */
  21809. +int swork_get(void)
  21810. +{
  21811. + struct sworker *worker;
  21812. +
  21813. + mutex_lock(&worker_mutex);
  21814. + if (!glob_worker) {
  21815. + worker = swork_create();
  21816. + if (IS_ERR(worker)) {
  21817. + mutex_unlock(&worker_mutex);
  21818. + return -ENOMEM;
  21819. + }
  21820. +
  21821. + glob_worker = worker;
  21822. + }
  21823. +
  21824. + glob_worker->refs++;
  21825. + mutex_unlock(&worker_mutex);
  21826. +
  21827. + return 0;
  21828. +}
  21829. +EXPORT_SYMBOL_GPL(swork_get);
  21830. +
  21831. +/**
  21832. + * swork_put - puts an instance of the sworker
  21833. + *
  21834. + * Will destroy the sworker thread. This function must not be called until all
  21835. + * queued events have been completed.
  21836. + */
  21837. +void swork_put(void)
  21838. +{
  21839. + mutex_lock(&worker_mutex);
  21840. +
  21841. + glob_worker->refs--;
  21842. + if (glob_worker->refs > 0)
  21843. + goto out;
  21844. +
  21845. + swork_destroy(glob_worker);
  21846. + glob_worker = NULL;
  21847. +out:
  21848. + mutex_unlock(&worker_mutex);
  21849. +}
  21850. +EXPORT_SYMBOL_GPL(swork_put);
  21851. diff -Nur linux-4.4.62.orig/kernel/signal.c linux-4.4.62/kernel/signal.c
  21852. --- linux-4.4.62.orig/kernel/signal.c 2017-04-18 07:15:37.000000000 +0200
  21853. +++ linux-4.4.62/kernel/signal.c 2017-04-18 17:38:08.222650330 +0200
  21854. @@ -14,6 +14,7 @@
  21855. #include <linux/export.h>
  21856. #include <linux/init.h>
  21857. #include <linux/sched.h>
  21858. +#include <linux/sched/rt.h>
  21859. #include <linux/fs.h>
  21860. #include <linux/tty.h>
  21861. #include <linux/binfmts.h>
  21862. @@ -352,13 +353,30 @@
  21863. return false;
  21864. }
  21865. +static inline struct sigqueue *get_task_cache(struct task_struct *t)
  21866. +{
  21867. + struct sigqueue *q = t->sigqueue_cache;
  21868. +
  21869. + if (cmpxchg(&t->sigqueue_cache, q, NULL) != q)
  21870. + return NULL;
  21871. + return q;
  21872. +}
  21873. +
  21874. +static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
  21875. +{
  21876. + if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL)
  21877. + return 0;
  21878. + return 1;
  21879. +}
  21880. +
  21881. /*
  21882. * allocate a new signal queue record
  21883. * - this may be called without locks if and only if t == current, otherwise an
  21884. * appropriate lock must be held to stop the target task from exiting
  21885. */
  21886. static struct sigqueue *
  21887. -__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
  21888. +__sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags,
  21889. + int override_rlimit, int fromslab)
  21890. {
  21891. struct sigqueue *q = NULL;
  21892. struct user_struct *user;
  21893. @@ -375,7 +393,10 @@
  21894. if (override_rlimit ||
  21895. atomic_read(&user->sigpending) <=
  21896. task_rlimit(t, RLIMIT_SIGPENDING)) {
  21897. - q = kmem_cache_alloc(sigqueue_cachep, flags);
  21898. + if (!fromslab)
  21899. + q = get_task_cache(t);
  21900. + if (!q)
  21901. + q = kmem_cache_alloc(sigqueue_cachep, flags);
  21902. } else {
  21903. print_dropped_signal(sig);
  21904. }
  21905. @@ -392,6 +413,13 @@
  21906. return q;
  21907. }
  21908. +static struct sigqueue *
  21909. +__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags,
  21910. + int override_rlimit)
  21911. +{
  21912. + return __sigqueue_do_alloc(sig, t, flags, override_rlimit, 0);
  21913. +}
  21914. +
  21915. static void __sigqueue_free(struct sigqueue *q)
  21916. {
  21917. if (q->flags & SIGQUEUE_PREALLOC)
  21918. @@ -401,6 +429,21 @@
  21919. kmem_cache_free(sigqueue_cachep, q);
  21920. }
  21921. +static void sigqueue_free_current(struct sigqueue *q)
  21922. +{
  21923. + struct user_struct *up;
  21924. +
  21925. + if (q->flags & SIGQUEUE_PREALLOC)
  21926. + return;
  21927. +
  21928. + up = q->user;
  21929. + if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) {
  21930. + atomic_dec(&up->sigpending);
  21931. + free_uid(up);
  21932. + } else
  21933. + __sigqueue_free(q);
  21934. +}
  21935. +
  21936. void flush_sigqueue(struct sigpending *queue)
  21937. {
  21938. struct sigqueue *q;
  21939. @@ -414,6 +457,21 @@
  21940. }
  21941. /*
  21942. + * Called from __exit_signal. Flush tsk->pending and
  21943. + * tsk->sigqueue_cache
  21944. + */
  21945. +void flush_task_sigqueue(struct task_struct *tsk)
  21946. +{
  21947. + struct sigqueue *q;
  21948. +
  21949. + flush_sigqueue(&tsk->pending);
  21950. +
  21951. + q = get_task_cache(tsk);
  21952. + if (q)
  21953. + kmem_cache_free(sigqueue_cachep, q);
  21954. +}
  21955. +
  21956. +/*
  21957. * Flush all pending signals for this kthread.
  21958. */
  21959. void flush_signals(struct task_struct *t)
  21960. @@ -525,7 +583,7 @@
  21961. still_pending:
  21962. list_del_init(&first->list);
  21963. copy_siginfo(info, &first->info);
  21964. - __sigqueue_free(first);
  21965. + sigqueue_free_current(first);
  21966. } else {
  21967. /*
  21968. * Ok, it wasn't in the queue. This must be
  21969. @@ -560,6 +618,8 @@
  21970. {
  21971. int signr;
  21972. + WARN_ON_ONCE(tsk != current);
  21973. +
  21974. /* We only dequeue private signals from ourselves, we don't let
  21975. * signalfd steal them
  21976. */
  21977. @@ -1156,8 +1216,8 @@
  21978. * We don't want to have recursive SIGSEGV's etc, for example,
  21979. * that is why we also clear SIGNAL_UNKILLABLE.
  21980. */
  21981. -int
  21982. -force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  21983. +static int
  21984. +do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  21985. {
  21986. unsigned long int flags;
  21987. int ret, blocked, ignored;
  21988. @@ -1182,6 +1242,39 @@
  21989. return ret;
  21990. }
  21991. +int force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  21992. +{
  21993. +/*
  21994. + * On some archs, PREEMPT_RT has to delay sending a signal from a trap
  21995. + * since it can not enable preemption, and the signal code's spin_locks
  21996. + * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will
  21997. + * send the signal on exit of the trap.
  21998. + */
  21999. +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
  22000. + if (in_atomic()) {
  22001. + if (WARN_ON_ONCE(t != current))
  22002. + return 0;
  22003. + if (WARN_ON_ONCE(t->forced_info.si_signo))
  22004. + return 0;
  22005. +
  22006. + if (is_si_special(info)) {
  22007. + WARN_ON_ONCE(info != SEND_SIG_PRIV);
  22008. + t->forced_info.si_signo = sig;
  22009. + t->forced_info.si_errno = 0;
  22010. + t->forced_info.si_code = SI_KERNEL;
  22011. + t->forced_info.si_pid = 0;
  22012. + t->forced_info.si_uid = 0;
  22013. + } else {
  22014. + t->forced_info = *info;
  22015. + }
  22016. +
  22017. + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
  22018. + return 0;
  22019. + }
  22020. +#endif
  22021. + return do_force_sig_info(sig, info, t);
  22022. +}
  22023. +
  22024. /*
  22025. * Nuke all other threads in the group.
  22026. */
  22027. @@ -1216,12 +1309,12 @@
  22028. * Disable interrupts early to avoid deadlocks.
  22029. * See rcu_read_unlock() comment header for details.
  22030. */
  22031. - local_irq_save(*flags);
  22032. + local_irq_save_nort(*flags);
  22033. rcu_read_lock();
  22034. sighand = rcu_dereference(tsk->sighand);
  22035. if (unlikely(sighand == NULL)) {
  22036. rcu_read_unlock();
  22037. - local_irq_restore(*flags);
  22038. + local_irq_restore_nort(*flags);
  22039. break;
  22040. }
  22041. /*
  22042. @@ -1242,7 +1335,7 @@
  22043. }
  22044. spin_unlock(&sighand->siglock);
  22045. rcu_read_unlock();
  22046. - local_irq_restore(*flags);
  22047. + local_irq_restore_nort(*flags);
  22048. }
  22049. return sighand;
  22050. @@ -1485,7 +1578,8 @@
  22051. */
  22052. struct sigqueue *sigqueue_alloc(void)
  22053. {
  22054. - struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0);
  22055. + /* Preallocated sigqueue objects always from the slabcache ! */
  22056. + struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, 1);
  22057. if (q)
  22058. q->flags |= SIGQUEUE_PREALLOC;
  22059. @@ -1846,15 +1940,7 @@
  22060. if (gstop_done && ptrace_reparented(current))
  22061. do_notify_parent_cldstop(current, false, why);
  22062. - /*
  22063. - * Don't want to allow preemption here, because
  22064. - * sys_ptrace() needs this task to be inactive.
  22065. - *
  22066. - * XXX: implement read_unlock_no_resched().
  22067. - */
  22068. - preempt_disable();
  22069. read_unlock(&tasklist_lock);
  22070. - preempt_enable_no_resched();
  22071. freezable_schedule();
  22072. } else {
  22073. /*
  22074. diff -Nur linux-4.4.62.orig/kernel/softirq.c linux-4.4.62/kernel/softirq.c
  22075. --- linux-4.4.62.orig/kernel/softirq.c 2017-04-18 07:15:37.000000000 +0200
  22076. +++ linux-4.4.62/kernel/softirq.c 2017-04-18 17:38:08.222650330 +0200
  22077. @@ -21,10 +21,12 @@
  22078. #include <linux/freezer.h>
  22079. #include <linux/kthread.h>
  22080. #include <linux/rcupdate.h>
  22081. +#include <linux/delay.h>
  22082. #include <linux/ftrace.h>
  22083. #include <linux/smp.h>
  22084. #include <linux/smpboot.h>
  22085. #include <linux/tick.h>
  22086. +#include <linux/locallock.h>
  22087. #include <linux/irq.h>
  22088. #define CREATE_TRACE_POINTS
  22089. @@ -56,12 +58,108 @@
  22090. static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
  22091. DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
  22092. +#ifdef CONFIG_PREEMPT_RT_FULL
  22093. +#define TIMER_SOFTIRQS ((1 << TIMER_SOFTIRQ) | (1 << HRTIMER_SOFTIRQ))
  22094. +DEFINE_PER_CPU(struct task_struct *, ktimer_softirqd);
  22095. +#endif
  22096. const char * const softirq_to_name[NR_SOFTIRQS] = {
  22097. "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
  22098. "TASKLET", "SCHED", "HRTIMER", "RCU"
  22099. };
  22100. +#ifdef CONFIG_NO_HZ_COMMON
  22101. +# ifdef CONFIG_PREEMPT_RT_FULL
  22102. +
  22103. +struct softirq_runner {
  22104. + struct task_struct *runner[NR_SOFTIRQS];
  22105. +};
  22106. +
  22107. +static DEFINE_PER_CPU(struct softirq_runner, softirq_runners);
  22108. +
  22109. +static inline void softirq_set_runner(unsigned int sirq)
  22110. +{
  22111. + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
  22112. +
  22113. + sr->runner[sirq] = current;
  22114. +}
  22115. +
  22116. +static inline void softirq_clr_runner(unsigned int sirq)
  22117. +{
  22118. + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
  22119. +
  22120. + sr->runner[sirq] = NULL;
  22121. +}
  22122. +
  22123. +/*
  22124. + * On preempt-rt a softirq running context might be blocked on a
  22125. + * lock. There might be no other runnable task on this CPU because the
  22126. + * lock owner runs on some other CPU. So we have to go into idle with
  22127. + * the pending bit set. Therefor we need to check this otherwise we
  22128. + * warn about false positives which confuses users and defeats the
  22129. + * whole purpose of this test.
  22130. + *
  22131. + * This code is called with interrupts disabled.
  22132. + */
  22133. +void softirq_check_pending_idle(void)
  22134. +{
  22135. + static int rate_limit;
  22136. + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
  22137. + u32 warnpending;
  22138. + int i;
  22139. +
  22140. + if (rate_limit >= 10)
  22141. + return;
  22142. +
  22143. + warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK;
  22144. + for (i = 0; i < NR_SOFTIRQS; i++) {
  22145. + struct task_struct *tsk = sr->runner[i];
  22146. +
  22147. + /*
  22148. + * The wakeup code in rtmutex.c wakes up the task
  22149. + * _before_ it sets pi_blocked_on to NULL under
  22150. + * tsk->pi_lock. So we need to check for both: state
  22151. + * and pi_blocked_on.
  22152. + */
  22153. + if (tsk) {
  22154. + raw_spin_lock(&tsk->pi_lock);
  22155. + if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING) {
  22156. + /* Clear all bits pending in that task */
  22157. + warnpending &= ~(tsk->softirqs_raised);
  22158. + warnpending &= ~(1 << i);
  22159. + }
  22160. + raw_spin_unlock(&tsk->pi_lock);
  22161. + }
  22162. + }
  22163. +
  22164. + if (warnpending) {
  22165. + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
  22166. + warnpending);
  22167. + rate_limit++;
  22168. + }
  22169. +}
  22170. +# else
  22171. +/*
  22172. + * On !PREEMPT_RT we just printk rate limited:
  22173. + */
  22174. +void softirq_check_pending_idle(void)
  22175. +{
  22176. + static int rate_limit;
  22177. +
  22178. + if (rate_limit < 10 &&
  22179. + (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
  22180. + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
  22181. + local_softirq_pending());
  22182. + rate_limit++;
  22183. + }
  22184. +}
  22185. +# endif
  22186. +
  22187. +#else /* !CONFIG_NO_HZ_COMMON */
  22188. +static inline void softirq_set_runner(unsigned int sirq) { }
  22189. +static inline void softirq_clr_runner(unsigned int sirq) { }
  22190. +#endif
  22191. +
  22192. /*
  22193. * we cannot loop indefinitely here to avoid userspace starvation,
  22194. * but we also don't want to introduce a worst case 1/HZ latency
  22195. @@ -77,6 +175,79 @@
  22196. wake_up_process(tsk);
  22197. }
  22198. +#ifdef CONFIG_PREEMPT_RT_FULL
  22199. +static void wakeup_timer_softirqd(void)
  22200. +{
  22201. + /* Interrupts are disabled: no need to stop preemption */
  22202. + struct task_struct *tsk = __this_cpu_read(ktimer_softirqd);
  22203. +
  22204. + if (tsk && tsk->state != TASK_RUNNING)
  22205. + wake_up_process(tsk);
  22206. +}
  22207. +#endif
  22208. +
  22209. +static void handle_softirq(unsigned int vec_nr)
  22210. +{
  22211. + struct softirq_action *h = softirq_vec + vec_nr;
  22212. + int prev_count;
  22213. +
  22214. + prev_count = preempt_count();
  22215. +
  22216. + kstat_incr_softirqs_this_cpu(vec_nr);
  22217. +
  22218. + trace_softirq_entry(vec_nr);
  22219. + h->action(h);
  22220. + trace_softirq_exit(vec_nr);
  22221. + if (unlikely(prev_count != preempt_count())) {
  22222. + pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
  22223. + vec_nr, softirq_to_name[vec_nr], h->action,
  22224. + prev_count, preempt_count());
  22225. + preempt_count_set(prev_count);
  22226. + }
  22227. +}
  22228. +
  22229. +#ifndef CONFIG_PREEMPT_RT_FULL
  22230. +static inline int ksoftirqd_softirq_pending(void)
  22231. +{
  22232. + return local_softirq_pending();
  22233. +}
  22234. +
  22235. +static void handle_pending_softirqs(u32 pending)
  22236. +{
  22237. + struct softirq_action *h = softirq_vec;
  22238. + int softirq_bit;
  22239. +
  22240. + local_irq_enable();
  22241. +
  22242. + h = softirq_vec;
  22243. +
  22244. + while ((softirq_bit = ffs(pending))) {
  22245. + unsigned int vec_nr;
  22246. +
  22247. + h += softirq_bit - 1;
  22248. + vec_nr = h - softirq_vec;
  22249. + handle_softirq(vec_nr);
  22250. +
  22251. + h++;
  22252. + pending >>= softirq_bit;
  22253. + }
  22254. +
  22255. + rcu_bh_qs();
  22256. + local_irq_disable();
  22257. +}
  22258. +
  22259. +static void run_ksoftirqd(unsigned int cpu)
  22260. +{
  22261. + local_irq_disable();
  22262. + if (ksoftirqd_softirq_pending()) {
  22263. + __do_softirq();
  22264. + local_irq_enable();
  22265. + cond_resched_rcu_qs();
  22266. + return;
  22267. + }
  22268. + local_irq_enable();
  22269. +}
  22270. +
  22271. /*
  22272. * preempt_count and SOFTIRQ_OFFSET usage:
  22273. * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
  22274. @@ -116,9 +287,9 @@
  22275. if (preempt_count() == cnt) {
  22276. #ifdef CONFIG_DEBUG_PREEMPT
  22277. - current->preempt_disable_ip = get_parent_ip(CALLER_ADDR1);
  22278. + current->preempt_disable_ip = get_lock_parent_ip();
  22279. #endif
  22280. - trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
  22281. + trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
  22282. }
  22283. }
  22284. EXPORT_SYMBOL(__local_bh_disable_ip);
  22285. @@ -232,10 +403,8 @@
  22286. unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
  22287. unsigned long old_flags = current->flags;
  22288. int max_restart = MAX_SOFTIRQ_RESTART;
  22289. - struct softirq_action *h;
  22290. bool in_hardirq;
  22291. __u32 pending;
  22292. - int softirq_bit;
  22293. /*
  22294. * Mask out PF_MEMALLOC s current task context is borrowed for the
  22295. @@ -254,36 +423,7 @@
  22296. /* Reset the pending bitmask before enabling irqs */
  22297. set_softirq_pending(0);
  22298. - local_irq_enable();
  22299. -
  22300. - h = softirq_vec;
  22301. -
  22302. - while ((softirq_bit = ffs(pending))) {
  22303. - unsigned int vec_nr;
  22304. - int prev_count;
  22305. -
  22306. - h += softirq_bit - 1;
  22307. -
  22308. - vec_nr = h - softirq_vec;
  22309. - prev_count = preempt_count();
  22310. -
  22311. - kstat_incr_softirqs_this_cpu(vec_nr);
  22312. -
  22313. - trace_softirq_entry(vec_nr);
  22314. - h->action(h);
  22315. - trace_softirq_exit(vec_nr);
  22316. - if (unlikely(prev_count != preempt_count())) {
  22317. - pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
  22318. - vec_nr, softirq_to_name[vec_nr], h->action,
  22319. - prev_count, preempt_count());
  22320. - preempt_count_set(prev_count);
  22321. - }
  22322. - h++;
  22323. - pending >>= softirq_bit;
  22324. - }
  22325. -
  22326. - rcu_bh_qs();
  22327. - local_irq_disable();
  22328. + handle_pending_softirqs(pending);
  22329. pending = local_softirq_pending();
  22330. if (pending) {
  22331. @@ -320,6 +460,310 @@
  22332. }
  22333. /*
  22334. + * This function must run with irqs disabled!
  22335. + */
  22336. +void raise_softirq_irqoff(unsigned int nr)
  22337. +{
  22338. + __raise_softirq_irqoff(nr);
  22339. +
  22340. + /*
  22341. + * If we're in an interrupt or softirq, we're done
  22342. + * (this also catches softirq-disabled code). We will
  22343. + * actually run the softirq once we return from
  22344. + * the irq or softirq.
  22345. + *
  22346. + * Otherwise we wake up ksoftirqd to make sure we
  22347. + * schedule the softirq soon.
  22348. + */
  22349. + if (!in_interrupt())
  22350. + wakeup_softirqd();
  22351. +}
  22352. +
  22353. +void __raise_softirq_irqoff(unsigned int nr)
  22354. +{
  22355. + trace_softirq_raise(nr);
  22356. + or_softirq_pending(1UL << nr);
  22357. +}
  22358. +
  22359. +static inline void local_bh_disable_nort(void) { local_bh_disable(); }
  22360. +static inline void _local_bh_enable_nort(void) { _local_bh_enable(); }
  22361. +static void ksoftirqd_set_sched_params(unsigned int cpu) { }
  22362. +
  22363. +#else /* !PREEMPT_RT_FULL */
  22364. +
  22365. +/*
  22366. + * On RT we serialize softirq execution with a cpu local lock per softirq
  22367. + */
  22368. +static DEFINE_PER_CPU(struct local_irq_lock [NR_SOFTIRQS], local_softirq_locks);
  22369. +
  22370. +void __init softirq_early_init(void)
  22371. +{
  22372. + int i;
  22373. +
  22374. + for (i = 0; i < NR_SOFTIRQS; i++)
  22375. + local_irq_lock_init(local_softirq_locks[i]);
  22376. +}
  22377. +
  22378. +static void lock_softirq(int which)
  22379. +{
  22380. + local_lock(local_softirq_locks[which]);
  22381. +}
  22382. +
  22383. +static void unlock_softirq(int which)
  22384. +{
  22385. + local_unlock(local_softirq_locks[which]);
  22386. +}
  22387. +
  22388. +static void do_single_softirq(int which)
  22389. +{
  22390. + unsigned long old_flags = current->flags;
  22391. +
  22392. + current->flags &= ~PF_MEMALLOC;
  22393. + vtime_account_irq_enter(current);
  22394. + current->flags |= PF_IN_SOFTIRQ;
  22395. + lockdep_softirq_enter();
  22396. + local_irq_enable();
  22397. + handle_softirq(which);
  22398. + local_irq_disable();
  22399. + lockdep_softirq_exit();
  22400. + current->flags &= ~PF_IN_SOFTIRQ;
  22401. + vtime_account_irq_enter(current);
  22402. + tsk_restore_flags(current, old_flags, PF_MEMALLOC);
  22403. +}
  22404. +
  22405. +/*
  22406. + * Called with interrupts disabled. Process softirqs which were raised
  22407. + * in current context (or on behalf of ksoftirqd).
  22408. + */
  22409. +static void do_current_softirqs(void)
  22410. +{
  22411. + while (current->softirqs_raised) {
  22412. + int i = __ffs(current->softirqs_raised);
  22413. + unsigned int pending, mask = (1U << i);
  22414. +
  22415. + current->softirqs_raised &= ~mask;
  22416. + local_irq_enable();
  22417. +
  22418. + /*
  22419. + * If the lock is contended, we boost the owner to
  22420. + * process the softirq or leave the critical section
  22421. + * now.
  22422. + */
  22423. + lock_softirq(i);
  22424. + local_irq_disable();
  22425. + softirq_set_runner(i);
  22426. + /*
  22427. + * Check with the local_softirq_pending() bits,
  22428. + * whether we need to process this still or if someone
  22429. + * else took care of it.
  22430. + */
  22431. + pending = local_softirq_pending();
  22432. + if (pending & mask) {
  22433. + set_softirq_pending(pending & ~mask);
  22434. + do_single_softirq(i);
  22435. + }
  22436. + softirq_clr_runner(i);
  22437. + WARN_ON(current->softirq_nestcnt != 1);
  22438. + local_irq_enable();
  22439. + unlock_softirq(i);
  22440. + local_irq_disable();
  22441. + }
  22442. +}
  22443. +
  22444. +void __local_bh_disable(void)
  22445. +{
  22446. + if (++current->softirq_nestcnt == 1)
  22447. + migrate_disable();
  22448. +}
  22449. +EXPORT_SYMBOL(__local_bh_disable);
  22450. +
  22451. +void __local_bh_enable(void)
  22452. +{
  22453. + if (WARN_ON(current->softirq_nestcnt == 0))
  22454. + return;
  22455. +
  22456. + local_irq_disable();
  22457. + if (current->softirq_nestcnt == 1 && current->softirqs_raised)
  22458. + do_current_softirqs();
  22459. + local_irq_enable();
  22460. +
  22461. + if (--current->softirq_nestcnt == 0)
  22462. + migrate_enable();
  22463. +}
  22464. +EXPORT_SYMBOL(__local_bh_enable);
  22465. +
  22466. +void _local_bh_enable(void)
  22467. +{
  22468. + if (WARN_ON(current->softirq_nestcnt == 0))
  22469. + return;
  22470. + if (--current->softirq_nestcnt == 0)
  22471. + migrate_enable();
  22472. +}
  22473. +EXPORT_SYMBOL(_local_bh_enable);
  22474. +
  22475. +int in_serving_softirq(void)
  22476. +{
  22477. + return current->flags & PF_IN_SOFTIRQ;
  22478. +}
  22479. +EXPORT_SYMBOL(in_serving_softirq);
  22480. +
  22481. +/* Called with preemption disabled */
  22482. +static void run_ksoftirqd(unsigned int cpu)
  22483. +{
  22484. + local_irq_disable();
  22485. + current->softirq_nestcnt++;
  22486. +
  22487. + do_current_softirqs();
  22488. + current->softirq_nestcnt--;
  22489. + local_irq_enable();
  22490. + cond_resched_rcu_qs();
  22491. +}
  22492. +
  22493. +/*
  22494. + * Called from netif_rx_ni(). Preemption enabled, but migration
  22495. + * disabled. So the cpu can't go away under us.
  22496. + */
  22497. +void thread_do_softirq(void)
  22498. +{
  22499. + if (!in_serving_softirq() && current->softirqs_raised) {
  22500. + current->softirq_nestcnt++;
  22501. + do_current_softirqs();
  22502. + current->softirq_nestcnt--;
  22503. + }
  22504. +}
  22505. +
  22506. +static void do_raise_softirq_irqoff(unsigned int nr)
  22507. +{
  22508. + unsigned int mask;
  22509. +
  22510. + mask = 1UL << nr;
  22511. +
  22512. + trace_softirq_raise(nr);
  22513. + or_softirq_pending(mask);
  22514. +
  22515. + /*
  22516. + * If we are not in a hard interrupt and inside a bh disabled
  22517. + * region, we simply raise the flag on current. local_bh_enable()
  22518. + * will make sure that the softirq is executed. Otherwise we
  22519. + * delegate it to ksoftirqd.
  22520. + */
  22521. + if (!in_irq() && current->softirq_nestcnt)
  22522. + current->softirqs_raised |= mask;
  22523. + else if (!__this_cpu_read(ksoftirqd) || !__this_cpu_read(ktimer_softirqd))
  22524. + return;
  22525. +
  22526. + if (mask & TIMER_SOFTIRQS)
  22527. + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask;
  22528. + else
  22529. + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask;
  22530. +}
  22531. +
  22532. +static void wakeup_proper_softirq(unsigned int nr)
  22533. +{
  22534. + if ((1UL << nr) & TIMER_SOFTIRQS)
  22535. + wakeup_timer_softirqd();
  22536. + else
  22537. + wakeup_softirqd();
  22538. +}
  22539. +
  22540. +
  22541. +void __raise_softirq_irqoff(unsigned int nr)
  22542. +{
  22543. + do_raise_softirq_irqoff(nr);
  22544. + if (!in_irq() && !current->softirq_nestcnt)
  22545. + wakeup_proper_softirq(nr);
  22546. +}
  22547. +
  22548. +/*
  22549. + * Same as __raise_softirq_irqoff() but will process them in ksoftirqd
  22550. + */
  22551. +void __raise_softirq_irqoff_ksoft(unsigned int nr)
  22552. +{
  22553. + unsigned int mask;
  22554. +
  22555. + if (WARN_ON_ONCE(!__this_cpu_read(ksoftirqd) ||
  22556. + !__this_cpu_read(ktimer_softirqd)))
  22557. + return;
  22558. + mask = 1UL << nr;
  22559. +
  22560. + trace_softirq_raise(nr);
  22561. + or_softirq_pending(mask);
  22562. + if (mask & TIMER_SOFTIRQS)
  22563. + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask;
  22564. + else
  22565. + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask;
  22566. + wakeup_proper_softirq(nr);
  22567. +}
  22568. +
  22569. +/*
  22570. + * This function must run with irqs disabled!
  22571. + */
  22572. +void raise_softirq_irqoff(unsigned int nr)
  22573. +{
  22574. + do_raise_softirq_irqoff(nr);
  22575. +
  22576. + /*
  22577. + * If we're in an hard interrupt we let irq return code deal
  22578. + * with the wakeup of ksoftirqd.
  22579. + */
  22580. + if (in_irq())
  22581. + return;
  22582. + /*
  22583. + * If we are in thread context but outside of a bh disabled
  22584. + * region, we need to wake ksoftirqd as well.
  22585. + *
  22586. + * CHECKME: Some of the places which do that could be wrapped
  22587. + * into local_bh_disable/enable pairs. Though it's unclear
  22588. + * whether this is worth the effort. To find those places just
  22589. + * raise a WARN() if the condition is met.
  22590. + */
  22591. + if (!current->softirq_nestcnt)
  22592. + wakeup_proper_softirq(nr);
  22593. +}
  22594. +
  22595. +static inline int ksoftirqd_softirq_pending(void)
  22596. +{
  22597. + return current->softirqs_raised;
  22598. +}
  22599. +
  22600. +static inline void local_bh_disable_nort(void) { }
  22601. +static inline void _local_bh_enable_nort(void) { }
  22602. +
  22603. +static inline void ksoftirqd_set_sched_params(unsigned int cpu)
  22604. +{
  22605. + /* Take over all but timer pending softirqs when starting */
  22606. + local_irq_disable();
  22607. + current->softirqs_raised = local_softirq_pending() & ~TIMER_SOFTIRQS;
  22608. + local_irq_enable();
  22609. +}
  22610. +
  22611. +static inline void ktimer_softirqd_set_sched_params(unsigned int cpu)
  22612. +{
  22613. + struct sched_param param = { .sched_priority = 1 };
  22614. +
  22615. + sched_setscheduler(current, SCHED_FIFO, &param);
  22616. +
  22617. + /* Take over timer pending softirqs when starting */
  22618. + local_irq_disable();
  22619. + current->softirqs_raised = local_softirq_pending() & TIMER_SOFTIRQS;
  22620. + local_irq_enable();
  22621. +}
  22622. +
  22623. +static inline void ktimer_softirqd_clr_sched_params(unsigned int cpu,
  22624. + bool online)
  22625. +{
  22626. + struct sched_param param = { .sched_priority = 0 };
  22627. +
  22628. + sched_setscheduler(current, SCHED_NORMAL, &param);
  22629. +}
  22630. +
  22631. +static int ktimer_softirqd_should_run(unsigned int cpu)
  22632. +{
  22633. + return current->softirqs_raised;
  22634. +}
  22635. +
  22636. +#endif /* PREEMPT_RT_FULL */
  22637. +/*
  22638. * Enter an interrupt context.
  22639. */
  22640. void irq_enter(void)
  22641. @@ -330,9 +774,9 @@
  22642. * Prevent raise_softirq from needlessly waking up ksoftirqd
  22643. * here, as softirq will be serviced on return from interrupt.
  22644. */
  22645. - local_bh_disable();
  22646. + local_bh_disable_nort();
  22647. tick_irq_enter();
  22648. - _local_bh_enable();
  22649. + _local_bh_enable_nort();
  22650. }
  22651. __irq_enter();
  22652. @@ -340,6 +784,7 @@
  22653. static inline void invoke_softirq(void)
  22654. {
  22655. +#ifndef CONFIG_PREEMPT_RT_FULL
  22656. if (!force_irqthreads) {
  22657. #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
  22658. /*
  22659. @@ -359,6 +804,18 @@
  22660. } else {
  22661. wakeup_softirqd();
  22662. }
  22663. +#else /* PREEMPT_RT_FULL */
  22664. + unsigned long flags;
  22665. +
  22666. + local_irq_save(flags);
  22667. + if (__this_cpu_read(ksoftirqd) &&
  22668. + __this_cpu_read(ksoftirqd)->softirqs_raised)
  22669. + wakeup_softirqd();
  22670. + if (__this_cpu_read(ktimer_softirqd) &&
  22671. + __this_cpu_read(ktimer_softirqd)->softirqs_raised)
  22672. + wakeup_timer_softirqd();
  22673. + local_irq_restore(flags);
  22674. +#endif
  22675. }
  22676. static inline void tick_irq_exit(void)
  22677. @@ -395,26 +852,6 @@
  22678. trace_hardirq_exit(); /* must be last! */
  22679. }
  22680. -/*
  22681. - * This function must run with irqs disabled!
  22682. - */
  22683. -inline void raise_softirq_irqoff(unsigned int nr)
  22684. -{
  22685. - __raise_softirq_irqoff(nr);
  22686. -
  22687. - /*
  22688. - * If we're in an interrupt or softirq, we're done
  22689. - * (this also catches softirq-disabled code). We will
  22690. - * actually run the softirq once we return from
  22691. - * the irq or softirq.
  22692. - *
  22693. - * Otherwise we wake up ksoftirqd to make sure we
  22694. - * schedule the softirq soon.
  22695. - */
  22696. - if (!in_interrupt())
  22697. - wakeup_softirqd();
  22698. -}
  22699. -
  22700. void raise_softirq(unsigned int nr)
  22701. {
  22702. unsigned long flags;
  22703. @@ -424,12 +861,6 @@
  22704. local_irq_restore(flags);
  22705. }
  22706. -void __raise_softirq_irqoff(unsigned int nr)
  22707. -{
  22708. - trace_softirq_raise(nr);
  22709. - or_softirq_pending(1UL << nr);
  22710. -}
  22711. -
  22712. void open_softirq(int nr, void (*action)(struct softirq_action *))
  22713. {
  22714. softirq_vec[nr].action = action;
  22715. @@ -446,15 +877,45 @@
  22716. static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
  22717. static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
  22718. +static void inline
  22719. +__tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr)
  22720. +{
  22721. + if (tasklet_trylock(t)) {
  22722. +again:
  22723. + /* We may have been preempted before tasklet_trylock
  22724. + * and __tasklet_action may have already run.
  22725. + * So double check the sched bit while the takslet
  22726. + * is locked before adding it to the list.
  22727. + */
  22728. + if (test_bit(TASKLET_STATE_SCHED, &t->state)) {
  22729. + t->next = NULL;
  22730. + *head->tail = t;
  22731. + head->tail = &(t->next);
  22732. + raise_softirq_irqoff(nr);
  22733. + tasklet_unlock(t);
  22734. + } else {
  22735. + /* This is subtle. If we hit the corner case above
  22736. + * It is possible that we get preempted right here,
  22737. + * and another task has successfully called
  22738. + * tasklet_schedule(), then this function, and
  22739. + * failed on the trylock. Thus we must be sure
  22740. + * before releasing the tasklet lock, that the
  22741. + * SCHED_BIT is clear. Otherwise the tasklet
  22742. + * may get its SCHED_BIT set, but not added to the
  22743. + * list
  22744. + */
  22745. + if (!tasklet_tryunlock(t))
  22746. + goto again;
  22747. + }
  22748. + }
  22749. +}
  22750. +
  22751. void __tasklet_schedule(struct tasklet_struct *t)
  22752. {
  22753. unsigned long flags;
  22754. local_irq_save(flags);
  22755. - t->next = NULL;
  22756. - *__this_cpu_read(tasklet_vec.tail) = t;
  22757. - __this_cpu_write(tasklet_vec.tail, &(t->next));
  22758. - raise_softirq_irqoff(TASKLET_SOFTIRQ);
  22759. + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
  22760. local_irq_restore(flags);
  22761. }
  22762. EXPORT_SYMBOL(__tasklet_schedule);
  22763. @@ -464,10 +925,7 @@
  22764. unsigned long flags;
  22765. local_irq_save(flags);
  22766. - t->next = NULL;
  22767. - *__this_cpu_read(tasklet_hi_vec.tail) = t;
  22768. - __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
  22769. - raise_softirq_irqoff(HI_SOFTIRQ);
  22770. + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
  22771. local_irq_restore(flags);
  22772. }
  22773. EXPORT_SYMBOL(__tasklet_hi_schedule);
  22774. @@ -476,82 +934,122 @@
  22775. {
  22776. BUG_ON(!irqs_disabled());
  22777. - t->next = __this_cpu_read(tasklet_hi_vec.head);
  22778. - __this_cpu_write(tasklet_hi_vec.head, t);
  22779. - __raise_softirq_irqoff(HI_SOFTIRQ);
  22780. + __tasklet_hi_schedule(t);
  22781. }
  22782. EXPORT_SYMBOL(__tasklet_hi_schedule_first);
  22783. -static void tasklet_action(struct softirq_action *a)
  22784. +void tasklet_enable(struct tasklet_struct *t)
  22785. {
  22786. - struct tasklet_struct *list;
  22787. + if (!atomic_dec_and_test(&t->count))
  22788. + return;
  22789. + if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state))
  22790. + tasklet_schedule(t);
  22791. +}
  22792. +EXPORT_SYMBOL(tasklet_enable);
  22793. - local_irq_disable();
  22794. - list = __this_cpu_read(tasklet_vec.head);
  22795. - __this_cpu_write(tasklet_vec.head, NULL);
  22796. - __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head));
  22797. - local_irq_enable();
  22798. +static void __tasklet_action(struct softirq_action *a,
  22799. + struct tasklet_struct *list)
  22800. +{
  22801. + int loops = 1000000;
  22802. while (list) {
  22803. struct tasklet_struct *t = list;
  22804. list = list->next;
  22805. - if (tasklet_trylock(t)) {
  22806. - if (!atomic_read(&t->count)) {
  22807. - if (!test_and_clear_bit(TASKLET_STATE_SCHED,
  22808. - &t->state))
  22809. - BUG();
  22810. - t->func(t->data);
  22811. - tasklet_unlock(t);
  22812. - continue;
  22813. - }
  22814. - tasklet_unlock(t);
  22815. + /*
  22816. + * Should always succeed - after a tasklist got on the
  22817. + * list (after getting the SCHED bit set from 0 to 1),
  22818. + * nothing but the tasklet softirq it got queued to can
  22819. + * lock it:
  22820. + */
  22821. + if (!tasklet_trylock(t)) {
  22822. + WARN_ON(1);
  22823. + continue;
  22824. }
  22825. - local_irq_disable();
  22826. t->next = NULL;
  22827. - *__this_cpu_read(tasklet_vec.tail) = t;
  22828. - __this_cpu_write(tasklet_vec.tail, &(t->next));
  22829. - __raise_softirq_irqoff(TASKLET_SOFTIRQ);
  22830. - local_irq_enable();
  22831. +
  22832. + /*
  22833. + * If we cannot handle the tasklet because it's disabled,
  22834. + * mark it as pending. tasklet_enable() will later
  22835. + * re-schedule the tasklet.
  22836. + */
  22837. + if (unlikely(atomic_read(&t->count))) {
  22838. +out_disabled:
  22839. + /* implicit unlock: */
  22840. + wmb();
  22841. + t->state = TASKLET_STATEF_PENDING;
  22842. + continue;
  22843. + }
  22844. +
  22845. + /*
  22846. + * After this point on the tasklet might be rescheduled
  22847. + * on another CPU, but it can only be added to another
  22848. + * CPU's tasklet list if we unlock the tasklet (which we
  22849. + * dont do yet).
  22850. + */
  22851. + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
  22852. + WARN_ON(1);
  22853. +
  22854. +again:
  22855. + t->func(t->data);
  22856. +
  22857. + /*
  22858. + * Try to unlock the tasklet. We must use cmpxchg, because
  22859. + * another CPU might have scheduled or disabled the tasklet.
  22860. + * We only allow the STATE_RUN -> 0 transition here.
  22861. + */
  22862. + while (!tasklet_tryunlock(t)) {
  22863. + /*
  22864. + * If it got disabled meanwhile, bail out:
  22865. + */
  22866. + if (atomic_read(&t->count))
  22867. + goto out_disabled;
  22868. + /*
  22869. + * If it got scheduled meanwhile, re-execute
  22870. + * the tasklet function:
  22871. + */
  22872. + if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
  22873. + goto again;
  22874. + if (!--loops) {
  22875. + printk("hm, tasklet state: %08lx\n", t->state);
  22876. + WARN_ON(1);
  22877. + tasklet_unlock(t);
  22878. + break;
  22879. + }
  22880. + }
  22881. }
  22882. }
  22883. +static void tasklet_action(struct softirq_action *a)
  22884. +{
  22885. + struct tasklet_struct *list;
  22886. +
  22887. + local_irq_disable();
  22888. +
  22889. + list = __this_cpu_read(tasklet_vec.head);
  22890. + __this_cpu_write(tasklet_vec.head, NULL);
  22891. + __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head));
  22892. +
  22893. + local_irq_enable();
  22894. +
  22895. + __tasklet_action(a, list);
  22896. +}
  22897. +
  22898. static void tasklet_hi_action(struct softirq_action *a)
  22899. {
  22900. struct tasklet_struct *list;
  22901. local_irq_disable();
  22902. +
  22903. list = __this_cpu_read(tasklet_hi_vec.head);
  22904. __this_cpu_write(tasklet_hi_vec.head, NULL);
  22905. __this_cpu_write(tasklet_hi_vec.tail, this_cpu_ptr(&tasklet_hi_vec.head));
  22906. - local_irq_enable();
  22907. - while (list) {
  22908. - struct tasklet_struct *t = list;
  22909. -
  22910. - list = list->next;
  22911. -
  22912. - if (tasklet_trylock(t)) {
  22913. - if (!atomic_read(&t->count)) {
  22914. - if (!test_and_clear_bit(TASKLET_STATE_SCHED,
  22915. - &t->state))
  22916. - BUG();
  22917. - t->func(t->data);
  22918. - tasklet_unlock(t);
  22919. - continue;
  22920. - }
  22921. - tasklet_unlock(t);
  22922. - }
  22923. + local_irq_enable();
  22924. - local_irq_disable();
  22925. - t->next = NULL;
  22926. - *__this_cpu_read(tasklet_hi_vec.tail) = t;
  22927. - __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
  22928. - __raise_softirq_irqoff(HI_SOFTIRQ);
  22929. - local_irq_enable();
  22930. - }
  22931. + __tasklet_action(a, list);
  22932. }
  22933. void tasklet_init(struct tasklet_struct *t,
  22934. @@ -572,7 +1070,7 @@
  22935. while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
  22936. do {
  22937. - yield();
  22938. + msleep(1);
  22939. } while (test_bit(TASKLET_STATE_SCHED, &t->state));
  22940. }
  22941. tasklet_unlock_wait(t);
  22942. @@ -646,25 +1144,26 @@
  22943. open_softirq(HI_SOFTIRQ, tasklet_hi_action);
  22944. }
  22945. -static int ksoftirqd_should_run(unsigned int cpu)
  22946. -{
  22947. - return local_softirq_pending();
  22948. -}
  22949. -
  22950. -static void run_ksoftirqd(unsigned int cpu)
  22951. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  22952. +void tasklet_unlock_wait(struct tasklet_struct *t)
  22953. {
  22954. - local_irq_disable();
  22955. - if (local_softirq_pending()) {
  22956. + while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
  22957. /*
  22958. - * We can safely run softirq on inline stack, as we are not deep
  22959. - * in the task stack here.
  22960. + * Hack for now to avoid this busy-loop:
  22961. */
  22962. - __do_softirq();
  22963. - local_irq_enable();
  22964. - cond_resched_rcu_qs();
  22965. - return;
  22966. +#ifdef CONFIG_PREEMPT_RT_FULL
  22967. + msleep(1);
  22968. +#else
  22969. + barrier();
  22970. +#endif
  22971. }
  22972. - local_irq_enable();
  22973. +}
  22974. +EXPORT_SYMBOL(tasklet_unlock_wait);
  22975. +#endif
  22976. +
  22977. +static int ksoftirqd_should_run(unsigned int cpu)
  22978. +{
  22979. + return ksoftirqd_softirq_pending();
  22980. }
  22981. #ifdef CONFIG_HOTPLUG_CPU
  22982. @@ -746,16 +1245,31 @@
  22983. static struct smp_hotplug_thread softirq_threads = {
  22984. .store = &ksoftirqd,
  22985. + .setup = ksoftirqd_set_sched_params,
  22986. .thread_should_run = ksoftirqd_should_run,
  22987. .thread_fn = run_ksoftirqd,
  22988. .thread_comm = "ksoftirqd/%u",
  22989. };
  22990. +#ifdef CONFIG_PREEMPT_RT_FULL
  22991. +static struct smp_hotplug_thread softirq_timer_threads = {
  22992. + .store = &ktimer_softirqd,
  22993. + .setup = ktimer_softirqd_set_sched_params,
  22994. + .cleanup = ktimer_softirqd_clr_sched_params,
  22995. + .thread_should_run = ktimer_softirqd_should_run,
  22996. + .thread_fn = run_ksoftirqd,
  22997. + .thread_comm = "ktimersoftd/%u",
  22998. +};
  22999. +#endif
  23000. +
  23001. static __init int spawn_ksoftirqd(void)
  23002. {
  23003. register_cpu_notifier(&cpu_nfb);
  23004. BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
  23005. +#ifdef CONFIG_PREEMPT_RT_FULL
  23006. + BUG_ON(smpboot_register_percpu_thread(&softirq_timer_threads));
  23007. +#endif
  23008. return 0;
  23009. }
  23010. diff -Nur linux-4.4.62.orig/kernel/stop_machine.c linux-4.4.62/kernel/stop_machine.c
  23011. --- linux-4.4.62.orig/kernel/stop_machine.c 2017-04-18 07:15:37.000000000 +0200
  23012. +++ linux-4.4.62/kernel/stop_machine.c 2017-04-18 17:38:08.222650330 +0200
  23013. @@ -37,7 +37,7 @@
  23014. struct cpu_stopper {
  23015. struct task_struct *thread;
  23016. - spinlock_t lock;
  23017. + raw_spinlock_t lock;
  23018. bool enabled; /* is this stopper enabled? */
  23019. struct list_head works; /* list of pending works */
  23020. @@ -86,12 +86,12 @@
  23021. struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
  23022. unsigned long flags;
  23023. - spin_lock_irqsave(&stopper->lock, flags);
  23024. + raw_spin_lock_irqsave(&stopper->lock, flags);
  23025. if (stopper->enabled)
  23026. __cpu_stop_queue_work(stopper, work);
  23027. else
  23028. cpu_stop_signal_done(work->done, false);
  23029. - spin_unlock_irqrestore(&stopper->lock, flags);
  23030. + raw_spin_unlock_irqrestore(&stopper->lock, flags);
  23031. }
  23032. /**
  23033. @@ -224,8 +224,8 @@
  23034. int err;
  23035. lg_double_lock(&stop_cpus_lock, cpu1, cpu2);
  23036. - spin_lock_irq(&stopper1->lock);
  23037. - spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
  23038. + raw_spin_lock_irq(&stopper1->lock);
  23039. + raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
  23040. err = -ENOENT;
  23041. if (!stopper1->enabled || !stopper2->enabled)
  23042. @@ -235,8 +235,8 @@
  23043. __cpu_stop_queue_work(stopper1, work1);
  23044. __cpu_stop_queue_work(stopper2, work2);
  23045. unlock:
  23046. - spin_unlock(&stopper2->lock);
  23047. - spin_unlock_irq(&stopper1->lock);
  23048. + raw_spin_unlock(&stopper2->lock);
  23049. + raw_spin_unlock_irq(&stopper1->lock);
  23050. lg_double_unlock(&stop_cpus_lock, cpu1, cpu2);
  23051. return err;
  23052. @@ -258,7 +258,7 @@
  23053. struct cpu_stop_work work1, work2;
  23054. struct multi_stop_data msdata;
  23055. - preempt_disable();
  23056. + preempt_disable_nort();
  23057. msdata = (struct multi_stop_data){
  23058. .fn = fn,
  23059. .data = arg,
  23060. @@ -278,11 +278,11 @@
  23061. if (cpu1 > cpu2)
  23062. swap(cpu1, cpu2);
  23063. if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2)) {
  23064. - preempt_enable();
  23065. + preempt_enable_nort();
  23066. return -ENOENT;
  23067. }
  23068. - preempt_enable();
  23069. + preempt_enable_nort();
  23070. wait_for_completion(&done.completion);
  23071. @@ -315,17 +315,20 @@
  23072. static void queue_stop_cpus_work(const struct cpumask *cpumask,
  23073. cpu_stop_fn_t fn, void *arg,
  23074. - struct cpu_stop_done *done)
  23075. + struct cpu_stop_done *done, bool inactive)
  23076. {
  23077. struct cpu_stop_work *work;
  23078. unsigned int cpu;
  23079. /*
  23080. - * Disable preemption while queueing to avoid getting
  23081. - * preempted by a stopper which might wait for other stoppers
  23082. - * to enter @fn which can lead to deadlock.
  23083. + * Make sure that all work is queued on all cpus before
  23084. + * any of the cpus can execute it.
  23085. */
  23086. - lg_global_lock(&stop_cpus_lock);
  23087. + if (!inactive)
  23088. + lg_global_lock(&stop_cpus_lock);
  23089. + else
  23090. + lg_global_trylock_relax(&stop_cpus_lock);
  23091. +
  23092. for_each_cpu(cpu, cpumask) {
  23093. work = &per_cpu(cpu_stopper.stop_work, cpu);
  23094. work->fn = fn;
  23095. @@ -342,7 +345,7 @@
  23096. struct cpu_stop_done done;
  23097. cpu_stop_init_done(&done, cpumask_weight(cpumask));
  23098. - queue_stop_cpus_work(cpumask, fn, arg, &done);
  23099. + queue_stop_cpus_work(cpumask, fn, arg, &done, false);
  23100. wait_for_completion(&done.completion);
  23101. return done.executed ? done.ret : -ENOENT;
  23102. }
  23103. @@ -422,9 +425,9 @@
  23104. unsigned long flags;
  23105. int run;
  23106. - spin_lock_irqsave(&stopper->lock, flags);
  23107. + raw_spin_lock_irqsave(&stopper->lock, flags);
  23108. run = !list_empty(&stopper->works);
  23109. - spin_unlock_irqrestore(&stopper->lock, flags);
  23110. + raw_spin_unlock_irqrestore(&stopper->lock, flags);
  23111. return run;
  23112. }
  23113. @@ -436,13 +439,13 @@
  23114. repeat:
  23115. work = NULL;
  23116. - spin_lock_irq(&stopper->lock);
  23117. + raw_spin_lock_irq(&stopper->lock);
  23118. if (!list_empty(&stopper->works)) {
  23119. work = list_first_entry(&stopper->works,
  23120. struct cpu_stop_work, list);
  23121. list_del_init(&work->list);
  23122. }
  23123. - spin_unlock_irq(&stopper->lock);
  23124. + raw_spin_unlock_irq(&stopper->lock);
  23125. if (work) {
  23126. cpu_stop_fn_t fn = work->fn;
  23127. @@ -450,6 +453,16 @@
  23128. struct cpu_stop_done *done = work->done;
  23129. char ksym_buf[KSYM_NAME_LEN] __maybe_unused;
  23130. + /*
  23131. + * Wait until the stopper finished scheduling on all
  23132. + * cpus
  23133. + */
  23134. + lg_global_lock(&stop_cpus_lock);
  23135. + /*
  23136. + * Let other cpu threads continue as well
  23137. + */
  23138. + lg_global_unlock(&stop_cpus_lock);
  23139. +
  23140. /* cpu stop callbacks are not allowed to sleep */
  23141. preempt_disable();
  23142. @@ -520,10 +533,12 @@
  23143. for_each_possible_cpu(cpu) {
  23144. struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
  23145. - spin_lock_init(&stopper->lock);
  23146. + raw_spin_lock_init(&stopper->lock);
  23147. INIT_LIST_HEAD(&stopper->works);
  23148. }
  23149. + lg_lock_init(&stop_cpus_lock, "stop_cpus_lock");
  23150. +
  23151. BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
  23152. stop_machine_unpark(raw_smp_processor_id());
  23153. stop_machine_initialized = true;
  23154. @@ -620,7 +635,7 @@
  23155. set_state(&msdata, MULTI_STOP_PREPARE);
  23156. cpu_stop_init_done(&done, num_active_cpus());
  23157. queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata,
  23158. - &done);
  23159. + &done, true);
  23160. ret = multi_cpu_stop(&msdata);
  23161. /* Busy wait for completion. */
  23162. diff -Nur linux-4.4.62.orig/kernel/time/hrtimer.c linux-4.4.62/kernel/time/hrtimer.c
  23163. --- linux-4.4.62.orig/kernel/time/hrtimer.c 2017-04-18 07:15:37.000000000 +0200
  23164. +++ linux-4.4.62/kernel/time/hrtimer.c 2017-04-18 17:38:08.222650330 +0200
  23165. @@ -48,11 +48,13 @@
  23166. #include <linux/sched/rt.h>
  23167. #include <linux/sched/deadline.h>
  23168. #include <linux/timer.h>
  23169. +#include <linux/kthread.h>
  23170. #include <linux/freezer.h>
  23171. #include <asm/uaccess.h>
  23172. #include <trace/events/timer.h>
  23173. +#include <trace/events/hist.h>
  23174. #include "tick-internal.h"
  23175. @@ -717,6 +719,44 @@
  23176. static DECLARE_WORK(hrtimer_work, clock_was_set_work);
  23177. +#ifdef CONFIG_PREEMPT_RT_FULL
  23178. +/*
  23179. + * RT can not call schedule_work from real interrupt context.
  23180. + * Need to make a thread to do the real work.
  23181. + */
  23182. +static struct task_struct *clock_set_delay_thread;
  23183. +static bool do_clock_set_delay;
  23184. +
  23185. +static int run_clock_set_delay(void *ignore)
  23186. +{
  23187. + while (!kthread_should_stop()) {
  23188. + set_current_state(TASK_INTERRUPTIBLE);
  23189. + if (do_clock_set_delay) {
  23190. + do_clock_set_delay = false;
  23191. + schedule_work(&hrtimer_work);
  23192. + }
  23193. + schedule();
  23194. + }
  23195. + __set_current_state(TASK_RUNNING);
  23196. + return 0;
  23197. +}
  23198. +
  23199. +void clock_was_set_delayed(void)
  23200. +{
  23201. + do_clock_set_delay = true;
  23202. + /* Make visible before waking up process */
  23203. + smp_wmb();
  23204. + wake_up_process(clock_set_delay_thread);
  23205. +}
  23206. +
  23207. +static __init int create_clock_set_delay_thread(void)
  23208. +{
  23209. + clock_set_delay_thread = kthread_run(run_clock_set_delay, NULL, "kclksetdelayd");
  23210. + BUG_ON(!clock_set_delay_thread);
  23211. + return 0;
  23212. +}
  23213. +early_initcall(create_clock_set_delay_thread);
  23214. +#else /* PREEMPT_RT_FULL */
  23215. /*
  23216. * Called from timekeeping and resume code to reprogramm the hrtimer
  23217. * interrupt device on all cpus.
  23218. @@ -725,6 +765,7 @@
  23219. {
  23220. schedule_work(&hrtimer_work);
  23221. }
  23222. +#endif
  23223. #else
  23224. @@ -734,11 +775,8 @@
  23225. static inline void hrtimer_switch_to_hres(void) { }
  23226. static inline void
  23227. hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
  23228. -static inline int hrtimer_reprogram(struct hrtimer *timer,
  23229. - struct hrtimer_clock_base *base)
  23230. -{
  23231. - return 0;
  23232. -}
  23233. +static inline void hrtimer_reprogram(struct hrtimer *timer,
  23234. + struct hrtimer_clock_base *base) { }
  23235. static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
  23236. static inline void retrigger_next_event(void *arg) { }
  23237. @@ -870,6 +908,32 @@
  23238. }
  23239. EXPORT_SYMBOL_GPL(hrtimer_forward);
  23240. +#ifdef CONFIG_PREEMPT_RT_BASE
  23241. +# define wake_up_timer_waiters(b) wake_up(&(b)->wait)
  23242. +
  23243. +/**
  23244. + * hrtimer_wait_for_timer - Wait for a running timer
  23245. + *
  23246. + * @timer: timer to wait for
  23247. + *
  23248. + * The function waits in case the timers callback function is
  23249. + * currently executed on the waitqueue of the timer base. The
  23250. + * waitqueue is woken up after the timer callback function has
  23251. + * finished execution.
  23252. + */
  23253. +void hrtimer_wait_for_timer(const struct hrtimer *timer)
  23254. +{
  23255. + struct hrtimer_clock_base *base = timer->base;
  23256. +
  23257. + if (base && base->cpu_base && !timer->irqsafe)
  23258. + wait_event(base->cpu_base->wait,
  23259. + !(hrtimer_callback_running(timer)));
  23260. +}
  23261. +
  23262. +#else
  23263. +# define wake_up_timer_waiters(b) do { } while (0)
  23264. +#endif
  23265. +
  23266. /*
  23267. * enqueue_hrtimer - internal function to (re)start a timer
  23268. *
  23269. @@ -911,6 +975,11 @@
  23270. if (!(state & HRTIMER_STATE_ENQUEUED))
  23271. return;
  23272. + if (unlikely(!list_empty(&timer->cb_entry))) {
  23273. + list_del_init(&timer->cb_entry);
  23274. + return;
  23275. + }
  23276. +
  23277. if (!timerqueue_del(&base->active, &timer->node))
  23278. cpu_base->active_bases &= ~(1 << base->index);
  23279. @@ -1006,7 +1075,16 @@
  23280. new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
  23281. timer_stats_hrtimer_set_start_info(timer);
  23282. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  23283. + {
  23284. + ktime_t now = new_base->get_time();
  23285. + if (ktime_to_ns(tim) < ktime_to_ns(now))
  23286. + timer->praecox = now;
  23287. + else
  23288. + timer->praecox = ktime_set(0, 0);
  23289. + }
  23290. +#endif
  23291. leftmost = enqueue_hrtimer(timer, new_base);
  23292. if (!leftmost)
  23293. goto unlock;
  23294. @@ -1078,7 +1156,7 @@
  23295. if (ret >= 0)
  23296. return ret;
  23297. - cpu_relax();
  23298. + hrtimer_wait_for_timer(timer);
  23299. }
  23300. }
  23301. EXPORT_SYMBOL_GPL(hrtimer_cancel);
  23302. @@ -1142,6 +1220,7 @@
  23303. base = hrtimer_clockid_to_base(clock_id);
  23304. timer->base = &cpu_base->clock_base[base];
  23305. + INIT_LIST_HEAD(&timer->cb_entry);
  23306. timerqueue_init(&timer->node);
  23307. #ifdef CONFIG_TIMER_STATS
  23308. @@ -1182,6 +1261,7 @@
  23309. seq = raw_read_seqcount_begin(&cpu_base->seq);
  23310. if (timer->state != HRTIMER_STATE_INACTIVE ||
  23311. + cpu_base->running_soft == timer ||
  23312. cpu_base->running == timer)
  23313. return true;
  23314. @@ -1280,10 +1360,112 @@
  23315. cpu_base->running = NULL;
  23316. }
  23317. +#ifdef CONFIG_PREEMPT_RT_BASE
  23318. +static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer,
  23319. + struct hrtimer_clock_base *base)
  23320. +{
  23321. + int leftmost;
  23322. +
  23323. + if (restart != HRTIMER_NORESTART &&
  23324. + !(timer->state & HRTIMER_STATE_ENQUEUED)) {
  23325. +
  23326. + leftmost = enqueue_hrtimer(timer, base);
  23327. + if (!leftmost)
  23328. + return;
  23329. +#ifdef CONFIG_HIGH_RES_TIMERS
  23330. + if (!hrtimer_is_hres_active(timer)) {
  23331. + /*
  23332. + * Kick to reschedule the next tick to handle the new timer
  23333. + * on dynticks target.
  23334. + */
  23335. + if (base->cpu_base->nohz_active)
  23336. + wake_up_nohz_cpu(base->cpu_base->cpu);
  23337. + } else {
  23338. +
  23339. + hrtimer_reprogram(timer, base);
  23340. + }
  23341. +#endif
  23342. + }
  23343. +}
  23344. +
  23345. +/*
  23346. + * The changes in mainline which removed the callback modes from
  23347. + * hrtimer are not yet working with -rt. The non wakeup_process()
  23348. + * based callbacks which involve sleeping locks need to be treated
  23349. + * seperately.
  23350. + */
  23351. +static void hrtimer_rt_run_pending(void)
  23352. +{
  23353. + enum hrtimer_restart (*fn)(struct hrtimer *);
  23354. + struct hrtimer_cpu_base *cpu_base;
  23355. + struct hrtimer_clock_base *base;
  23356. + struct hrtimer *timer;
  23357. + int index, restart;
  23358. +
  23359. + local_irq_disable();
  23360. + cpu_base = &per_cpu(hrtimer_bases, smp_processor_id());
  23361. +
  23362. + raw_spin_lock(&cpu_base->lock);
  23363. +
  23364. + for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
  23365. + base = &cpu_base->clock_base[index];
  23366. +
  23367. + while (!list_empty(&base->expired)) {
  23368. + timer = list_first_entry(&base->expired,
  23369. + struct hrtimer, cb_entry);
  23370. +
  23371. + /*
  23372. + * Same as the above __run_hrtimer function
  23373. + * just we run with interrupts enabled.
  23374. + */
  23375. + debug_deactivate(timer);
  23376. + cpu_base->running_soft = timer;
  23377. + raw_write_seqcount_barrier(&cpu_base->seq);
  23378. +
  23379. + __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
  23380. + timer_stats_account_hrtimer(timer);
  23381. + fn = timer->function;
  23382. +
  23383. + raw_spin_unlock_irq(&cpu_base->lock);
  23384. + restart = fn(timer);
  23385. + raw_spin_lock_irq(&cpu_base->lock);
  23386. +
  23387. + hrtimer_rt_reprogram(restart, timer, base);
  23388. + raw_write_seqcount_barrier(&cpu_base->seq);
  23389. +
  23390. + WARN_ON_ONCE(cpu_base->running_soft != timer);
  23391. + cpu_base->running_soft = NULL;
  23392. + }
  23393. + }
  23394. +
  23395. + raw_spin_unlock_irq(&cpu_base->lock);
  23396. +
  23397. + wake_up_timer_waiters(cpu_base);
  23398. +}
  23399. +
  23400. +static int hrtimer_rt_defer(struct hrtimer *timer)
  23401. +{
  23402. + if (timer->irqsafe)
  23403. + return 0;
  23404. +
  23405. + __remove_hrtimer(timer, timer->base, timer->state, 0);
  23406. + list_add_tail(&timer->cb_entry, &timer->base->expired);
  23407. + return 1;
  23408. +}
  23409. +
  23410. +#else
  23411. +
  23412. +static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; }
  23413. +
  23414. +#endif
  23415. +
  23416. +static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer);
  23417. +
  23418. static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)
  23419. {
  23420. struct hrtimer_clock_base *base = cpu_base->clock_base;
  23421. unsigned int active = cpu_base->active_bases;
  23422. + int raise = 0;
  23423. for (; active; base++, active >>= 1) {
  23424. struct timerqueue_node *node;
  23425. @@ -1299,6 +1481,15 @@
  23426. timer = container_of(node, struct hrtimer, node);
  23427. + trace_hrtimer_interrupt(raw_smp_processor_id(),
  23428. + ktime_to_ns(ktime_sub(ktime_to_ns(timer->praecox) ?
  23429. + timer->praecox : hrtimer_get_expires(timer),
  23430. + basenow)),
  23431. + current,
  23432. + timer->function == hrtimer_wakeup ?
  23433. + container_of(timer, struct hrtimer_sleeper,
  23434. + timer)->task : NULL);
  23435. +
  23436. /*
  23437. * The immediate goal for using the softexpires is
  23438. * minimizing wakeups, not running timers at the
  23439. @@ -1314,9 +1505,14 @@
  23440. if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer))
  23441. break;
  23442. - __run_hrtimer(cpu_base, base, timer, &basenow);
  23443. + if (!hrtimer_rt_defer(timer))
  23444. + __run_hrtimer(cpu_base, base, timer, &basenow);
  23445. + else
  23446. + raise = 1;
  23447. }
  23448. }
  23449. + if (raise)
  23450. + raise_softirq_irqoff(HRTIMER_SOFTIRQ);
  23451. }
  23452. #ifdef CONFIG_HIGH_RES_TIMERS
  23453. @@ -1479,16 +1675,18 @@
  23454. void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
  23455. {
  23456. sl->timer.function = hrtimer_wakeup;
  23457. + sl->timer.irqsafe = 1;
  23458. sl->task = task;
  23459. }
  23460. EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
  23461. -static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
  23462. +static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode,
  23463. + unsigned long state)
  23464. {
  23465. hrtimer_init_sleeper(t, current);
  23466. do {
  23467. - set_current_state(TASK_INTERRUPTIBLE);
  23468. + set_current_state(state);
  23469. hrtimer_start_expires(&t->timer, mode);
  23470. if (likely(t->task))
  23471. @@ -1530,7 +1728,8 @@
  23472. HRTIMER_MODE_ABS);
  23473. hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
  23474. - if (do_nanosleep(&t, HRTIMER_MODE_ABS))
  23475. + /* cpu_chill() does not care about restart state. */
  23476. + if (do_nanosleep(&t, HRTIMER_MODE_ABS, TASK_INTERRUPTIBLE))
  23477. goto out;
  23478. rmtp = restart->nanosleep.rmtp;
  23479. @@ -1547,8 +1746,10 @@
  23480. return ret;
  23481. }
  23482. -long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  23483. - const enum hrtimer_mode mode, const clockid_t clockid)
  23484. +static long
  23485. +__hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  23486. + const enum hrtimer_mode mode, const clockid_t clockid,
  23487. + unsigned long state)
  23488. {
  23489. struct restart_block *restart;
  23490. struct hrtimer_sleeper t;
  23491. @@ -1561,7 +1762,7 @@
  23492. hrtimer_init_on_stack(&t.timer, clockid, mode);
  23493. hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
  23494. - if (do_nanosleep(&t, mode))
  23495. + if (do_nanosleep(&t, mode, state))
  23496. goto out;
  23497. /* Absolute timers do not update the rmtp value and restart: */
  23498. @@ -1588,6 +1789,12 @@
  23499. return ret;
  23500. }
  23501. +long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  23502. + const enum hrtimer_mode mode, const clockid_t clockid)
  23503. +{
  23504. + return __hrtimer_nanosleep(rqtp, rmtp, mode, clockid, TASK_INTERRUPTIBLE);
  23505. +}
  23506. +
  23507. SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
  23508. struct timespec __user *, rmtp)
  23509. {
  23510. @@ -1602,6 +1809,26 @@
  23511. return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
  23512. }
  23513. +#ifdef CONFIG_PREEMPT_RT_FULL
  23514. +/*
  23515. + * Sleep for 1 ms in hope whoever holds what we want will let it go.
  23516. + */
  23517. +void cpu_chill(void)
  23518. +{
  23519. + struct timespec tu = {
  23520. + .tv_nsec = NSEC_PER_MSEC,
  23521. + };
  23522. + unsigned int freeze_flag = current->flags & PF_NOFREEZE;
  23523. +
  23524. + current->flags |= PF_NOFREEZE;
  23525. + __hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC,
  23526. + TASK_UNINTERRUPTIBLE);
  23527. + if (!freeze_flag)
  23528. + current->flags &= ~PF_NOFREEZE;
  23529. +}
  23530. +EXPORT_SYMBOL(cpu_chill);
  23531. +#endif
  23532. +
  23533. /*
  23534. * Functions related to boot-time initialization:
  23535. */
  23536. @@ -1613,10 +1840,14 @@
  23537. for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
  23538. cpu_base->clock_base[i].cpu_base = cpu_base;
  23539. timerqueue_init_head(&cpu_base->clock_base[i].active);
  23540. + INIT_LIST_HEAD(&cpu_base->clock_base[i].expired);
  23541. }
  23542. cpu_base->cpu = cpu;
  23543. hrtimer_init_hres(cpu_base);
  23544. +#ifdef CONFIG_PREEMPT_RT_BASE
  23545. + init_waitqueue_head(&cpu_base->wait);
  23546. +#endif
  23547. }
  23548. #ifdef CONFIG_HOTPLUG_CPU
  23549. @@ -1714,11 +1945,21 @@
  23550. .notifier_call = hrtimer_cpu_notify,
  23551. };
  23552. +#ifdef CONFIG_PREEMPT_RT_BASE
  23553. +static void run_hrtimer_softirq(struct softirq_action *h)
  23554. +{
  23555. + hrtimer_rt_run_pending();
  23556. +}
  23557. +#endif
  23558. +
  23559. void __init hrtimers_init(void)
  23560. {
  23561. hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
  23562. (void *)(long)smp_processor_id());
  23563. register_cpu_notifier(&hrtimers_nb);
  23564. +#ifdef CONFIG_PREEMPT_RT_BASE
  23565. + open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
  23566. +#endif
  23567. }
  23568. /**
  23569. diff -Nur linux-4.4.62.orig/kernel/time/itimer.c linux-4.4.62/kernel/time/itimer.c
  23570. --- linux-4.4.62.orig/kernel/time/itimer.c 2017-04-18 07:15:37.000000000 +0200
  23571. +++ linux-4.4.62/kernel/time/itimer.c 2017-04-18 17:38:08.222650330 +0200
  23572. @@ -213,6 +213,7 @@
  23573. /* We are sharing ->siglock with it_real_fn() */
  23574. if (hrtimer_try_to_cancel(timer) < 0) {
  23575. spin_unlock_irq(&tsk->sighand->siglock);
  23576. + hrtimer_wait_for_timer(&tsk->signal->real_timer);
  23577. goto again;
  23578. }
  23579. expires = timeval_to_ktime(value->it_value);
  23580. diff -Nur linux-4.4.62.orig/kernel/time/jiffies.c linux-4.4.62/kernel/time/jiffies.c
  23581. --- linux-4.4.62.orig/kernel/time/jiffies.c 2017-04-18 07:15:37.000000000 +0200
  23582. +++ linux-4.4.62/kernel/time/jiffies.c 2017-04-18 17:38:08.222650330 +0200
  23583. @@ -74,7 +74,8 @@
  23584. .max_cycles = 10,
  23585. };
  23586. -__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
  23587. +__cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock);
  23588. +__cacheline_aligned_in_smp seqcount_t jiffies_seq;
  23589. #if (BITS_PER_LONG < 64)
  23590. u64 get_jiffies_64(void)
  23591. @@ -83,9 +84,9 @@
  23592. u64 ret;
  23593. do {
  23594. - seq = read_seqbegin(&jiffies_lock);
  23595. + seq = read_seqcount_begin(&jiffies_seq);
  23596. ret = jiffies_64;
  23597. - } while (read_seqretry(&jiffies_lock, seq));
  23598. + } while (read_seqcount_retry(&jiffies_seq, seq));
  23599. return ret;
  23600. }
  23601. EXPORT_SYMBOL(get_jiffies_64);
  23602. diff -Nur linux-4.4.62.orig/kernel/time/ntp.c linux-4.4.62/kernel/time/ntp.c
  23603. --- linux-4.4.62.orig/kernel/time/ntp.c 2017-04-18 07:15:37.000000000 +0200
  23604. +++ linux-4.4.62/kernel/time/ntp.c 2017-04-18 17:38:08.222650330 +0200
  23605. @@ -10,6 +10,7 @@
  23606. #include <linux/workqueue.h>
  23607. #include <linux/hrtimer.h>
  23608. #include <linux/jiffies.h>
  23609. +#include <linux/kthread.h>
  23610. #include <linux/math64.h>
  23611. #include <linux/timex.h>
  23612. #include <linux/time.h>
  23613. @@ -562,10 +563,52 @@
  23614. &sync_cmos_work, timespec64_to_jiffies(&next));
  23615. }
  23616. +#ifdef CONFIG_PREEMPT_RT_FULL
  23617. +/*
  23618. + * RT can not call schedule_delayed_work from real interrupt context.
  23619. + * Need to make a thread to do the real work.
  23620. + */
  23621. +static struct task_struct *cmos_delay_thread;
  23622. +static bool do_cmos_delay;
  23623. +
  23624. +static int run_cmos_delay(void *ignore)
  23625. +{
  23626. + while (!kthread_should_stop()) {
  23627. + set_current_state(TASK_INTERRUPTIBLE);
  23628. + if (do_cmos_delay) {
  23629. + do_cmos_delay = false;
  23630. + queue_delayed_work(system_power_efficient_wq,
  23631. + &sync_cmos_work, 0);
  23632. + }
  23633. + schedule();
  23634. + }
  23635. + __set_current_state(TASK_RUNNING);
  23636. + return 0;
  23637. +}
  23638. +
  23639. +void ntp_notify_cmos_timer(void)
  23640. +{
  23641. + do_cmos_delay = true;
  23642. + /* Make visible before waking up process */
  23643. + smp_wmb();
  23644. + wake_up_process(cmos_delay_thread);
  23645. +}
  23646. +
  23647. +static __init int create_cmos_delay_thread(void)
  23648. +{
  23649. + cmos_delay_thread = kthread_run(run_cmos_delay, NULL, "kcmosdelayd");
  23650. + BUG_ON(!cmos_delay_thread);
  23651. + return 0;
  23652. +}
  23653. +early_initcall(create_cmos_delay_thread);
  23654. +
  23655. +#else
  23656. +
  23657. void ntp_notify_cmos_timer(void)
  23658. {
  23659. queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0);
  23660. }
  23661. +#endif /* CONFIG_PREEMPT_RT_FULL */
  23662. #else
  23663. void ntp_notify_cmos_timer(void) { }
  23664. diff -Nur linux-4.4.62.orig/kernel/time/posix-cpu-timers.c linux-4.4.62/kernel/time/posix-cpu-timers.c
  23665. --- linux-4.4.62.orig/kernel/time/posix-cpu-timers.c 2017-04-18 07:15:37.000000000 +0200
  23666. +++ linux-4.4.62/kernel/time/posix-cpu-timers.c 2017-04-18 17:38:08.222650330 +0200
  23667. @@ -3,6 +3,7 @@
  23668. */
  23669. #include <linux/sched.h>
  23670. +#include <linux/sched/rt.h>
  23671. #include <linux/posix-timers.h>
  23672. #include <linux/errno.h>
  23673. #include <linux/math64.h>
  23674. @@ -650,7 +651,7 @@
  23675. /*
  23676. * Disarm any old timer after extracting its expiry time.
  23677. */
  23678. - WARN_ON_ONCE(!irqs_disabled());
  23679. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  23680. ret = 0;
  23681. old_incr = timer->it.cpu.incr;
  23682. @@ -1092,7 +1093,7 @@
  23683. /*
  23684. * Now re-arm for the new expiry time.
  23685. */
  23686. - WARN_ON_ONCE(!irqs_disabled());
  23687. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  23688. arm_timer(timer);
  23689. unlock_task_sighand(p, &flags);
  23690. @@ -1183,13 +1184,13 @@
  23691. * already updated our counts. We need to check if any timers fire now.
  23692. * Interrupts are disabled.
  23693. */
  23694. -void run_posix_cpu_timers(struct task_struct *tsk)
  23695. +static void __run_posix_cpu_timers(struct task_struct *tsk)
  23696. {
  23697. LIST_HEAD(firing);
  23698. struct k_itimer *timer, *next;
  23699. unsigned long flags;
  23700. - WARN_ON_ONCE(!irqs_disabled());
  23701. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  23702. /*
  23703. * The fast path checks that there are no expired thread or thread
  23704. @@ -1243,6 +1244,190 @@
  23705. }
  23706. }
  23707. +#ifdef CONFIG_PREEMPT_RT_BASE
  23708. +#include <linux/kthread.h>
  23709. +#include <linux/cpu.h>
  23710. +DEFINE_PER_CPU(struct task_struct *, posix_timer_task);
  23711. +DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist);
  23712. +
  23713. +static int posix_cpu_timers_thread(void *data)
  23714. +{
  23715. + int cpu = (long)data;
  23716. +
  23717. + BUG_ON(per_cpu(posix_timer_task,cpu) != current);
  23718. +
  23719. + while (!kthread_should_stop()) {
  23720. + struct task_struct *tsk = NULL;
  23721. + struct task_struct *next = NULL;
  23722. +
  23723. + if (cpu_is_offline(cpu))
  23724. + goto wait_to_die;
  23725. +
  23726. + /* grab task list */
  23727. + raw_local_irq_disable();
  23728. + tsk = per_cpu(posix_timer_tasklist, cpu);
  23729. + per_cpu(posix_timer_tasklist, cpu) = NULL;
  23730. + raw_local_irq_enable();
  23731. +
  23732. + /* its possible the list is empty, just return */
  23733. + if (!tsk) {
  23734. + set_current_state(TASK_INTERRUPTIBLE);
  23735. + schedule();
  23736. + __set_current_state(TASK_RUNNING);
  23737. + continue;
  23738. + }
  23739. +
  23740. + /* Process task list */
  23741. + while (1) {
  23742. + /* save next */
  23743. + next = tsk->posix_timer_list;
  23744. +
  23745. + /* run the task timers, clear its ptr and
  23746. + * unreference it
  23747. + */
  23748. + __run_posix_cpu_timers(tsk);
  23749. + tsk->posix_timer_list = NULL;
  23750. + put_task_struct(tsk);
  23751. +
  23752. + /* check if this is the last on the list */
  23753. + if (next == tsk)
  23754. + break;
  23755. + tsk = next;
  23756. + }
  23757. + }
  23758. + return 0;
  23759. +
  23760. +wait_to_die:
  23761. + /* Wait for kthread_stop */
  23762. + set_current_state(TASK_INTERRUPTIBLE);
  23763. + while (!kthread_should_stop()) {
  23764. + schedule();
  23765. + set_current_state(TASK_INTERRUPTIBLE);
  23766. + }
  23767. + __set_current_state(TASK_RUNNING);
  23768. + return 0;
  23769. +}
  23770. +
  23771. +static inline int __fastpath_timer_check(struct task_struct *tsk)
  23772. +{
  23773. + /* tsk == current, ensure it is safe to use ->signal/sighand */
  23774. + if (unlikely(tsk->exit_state))
  23775. + return 0;
  23776. +
  23777. + if (!task_cputime_zero(&tsk->cputime_expires))
  23778. + return 1;
  23779. +
  23780. + if (!task_cputime_zero(&tsk->signal->cputime_expires))
  23781. + return 1;
  23782. +
  23783. + return 0;
  23784. +}
  23785. +
  23786. +void run_posix_cpu_timers(struct task_struct *tsk)
  23787. +{
  23788. + unsigned long cpu = smp_processor_id();
  23789. + struct task_struct *tasklist;
  23790. +
  23791. + BUG_ON(!irqs_disabled());
  23792. + if(!per_cpu(posix_timer_task, cpu))
  23793. + return;
  23794. + /* get per-cpu references */
  23795. + tasklist = per_cpu(posix_timer_tasklist, cpu);
  23796. +
  23797. + /* check to see if we're already queued */
  23798. + if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) {
  23799. + get_task_struct(tsk);
  23800. + if (tasklist) {
  23801. + tsk->posix_timer_list = tasklist;
  23802. + } else {
  23803. + /*
  23804. + * The list is terminated by a self-pointing
  23805. + * task_struct
  23806. + */
  23807. + tsk->posix_timer_list = tsk;
  23808. + }
  23809. + per_cpu(posix_timer_tasklist, cpu) = tsk;
  23810. +
  23811. + wake_up_process(per_cpu(posix_timer_task, cpu));
  23812. + }
  23813. +}
  23814. +
  23815. +/*
  23816. + * posix_cpu_thread_call - callback that gets triggered when a CPU is added.
  23817. + * Here we can start up the necessary migration thread for the new CPU.
  23818. + */
  23819. +static int posix_cpu_thread_call(struct notifier_block *nfb,
  23820. + unsigned long action, void *hcpu)
  23821. +{
  23822. + int cpu = (long)hcpu;
  23823. + struct task_struct *p;
  23824. + struct sched_param param;
  23825. +
  23826. + switch (action) {
  23827. + case CPU_UP_PREPARE:
  23828. + p = kthread_create(posix_cpu_timers_thread, hcpu,
  23829. + "posixcputmr/%d",cpu);
  23830. + if (IS_ERR(p))
  23831. + return NOTIFY_BAD;
  23832. + p->flags |= PF_NOFREEZE;
  23833. + kthread_bind(p, cpu);
  23834. + /* Must be high prio to avoid getting starved */
  23835. + param.sched_priority = MAX_RT_PRIO-1;
  23836. + sched_setscheduler(p, SCHED_FIFO, &param);
  23837. + per_cpu(posix_timer_task,cpu) = p;
  23838. + break;
  23839. + case CPU_ONLINE:
  23840. + /* Strictly unneccessary, as first user will wake it. */
  23841. + wake_up_process(per_cpu(posix_timer_task,cpu));
  23842. + break;
  23843. +#ifdef CONFIG_HOTPLUG_CPU
  23844. + case CPU_UP_CANCELED:
  23845. + /* Unbind it from offline cpu so it can run. Fall thru. */
  23846. + kthread_bind(per_cpu(posix_timer_task, cpu),
  23847. + cpumask_any(cpu_online_mask));
  23848. + kthread_stop(per_cpu(posix_timer_task,cpu));
  23849. + per_cpu(posix_timer_task,cpu) = NULL;
  23850. + break;
  23851. + case CPU_DEAD:
  23852. + kthread_stop(per_cpu(posix_timer_task,cpu));
  23853. + per_cpu(posix_timer_task,cpu) = NULL;
  23854. + break;
  23855. +#endif
  23856. + }
  23857. + return NOTIFY_OK;
  23858. +}
  23859. +
  23860. +/* Register at highest priority so that task migration (migrate_all_tasks)
  23861. + * happens before everything else.
  23862. + */
  23863. +static struct notifier_block posix_cpu_thread_notifier = {
  23864. + .notifier_call = posix_cpu_thread_call,
  23865. + .priority = 10
  23866. +};
  23867. +
  23868. +static int __init posix_cpu_thread_init(void)
  23869. +{
  23870. + void *hcpu = (void *)(long)smp_processor_id();
  23871. + /* Start one for boot CPU. */
  23872. + unsigned long cpu;
  23873. +
  23874. + /* init the per-cpu posix_timer_tasklets */
  23875. + for_each_possible_cpu(cpu)
  23876. + per_cpu(posix_timer_tasklist, cpu) = NULL;
  23877. +
  23878. + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_UP_PREPARE, hcpu);
  23879. + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_ONLINE, hcpu);
  23880. + register_cpu_notifier(&posix_cpu_thread_notifier);
  23881. + return 0;
  23882. +}
  23883. +early_initcall(posix_cpu_thread_init);
  23884. +#else /* CONFIG_PREEMPT_RT_BASE */
  23885. +void run_posix_cpu_timers(struct task_struct *tsk)
  23886. +{
  23887. + __run_posix_cpu_timers(tsk);
  23888. +}
  23889. +#endif /* CONFIG_PREEMPT_RT_BASE */
  23890. +
  23891. /*
  23892. * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
  23893. * The tsk->sighand->siglock must be held by the caller.
  23894. diff -Nur linux-4.4.62.orig/kernel/time/posix-timers.c linux-4.4.62/kernel/time/posix-timers.c
  23895. --- linux-4.4.62.orig/kernel/time/posix-timers.c 2017-04-18 07:15:37.000000000 +0200
  23896. +++ linux-4.4.62/kernel/time/posix-timers.c 2017-04-18 17:38:08.222650330 +0200
  23897. @@ -506,6 +506,7 @@
  23898. static struct pid *good_sigevent(sigevent_t * event)
  23899. {
  23900. struct task_struct *rtn = current->group_leader;
  23901. + int sig = event->sigev_signo;
  23902. if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
  23903. (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
  23904. @@ -514,7 +515,8 @@
  23905. return NULL;
  23906. if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
  23907. - ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
  23908. + (sig <= 0 || sig > SIGRTMAX || sig_kernel_only(sig) ||
  23909. + sig_kernel_coredump(sig)))
  23910. return NULL;
  23911. return task_pid(rtn);
  23912. @@ -826,6 +828,20 @@
  23913. return overrun;
  23914. }
  23915. +/*
  23916. + * Protected by RCU!
  23917. + */
  23918. +static void timer_wait_for_callback(struct k_clock *kc, struct k_itimer *timr)
  23919. +{
  23920. +#ifdef CONFIG_PREEMPT_RT_FULL
  23921. + if (kc->timer_set == common_timer_set)
  23922. + hrtimer_wait_for_timer(&timr->it.real.timer);
  23923. + else
  23924. + /* FIXME: Whacky hack for posix-cpu-timers */
  23925. + schedule_timeout(1);
  23926. +#endif
  23927. +}
  23928. +
  23929. /* Set a POSIX.1b interval timer. */
  23930. /* timr->it_lock is taken. */
  23931. static int
  23932. @@ -903,6 +919,7 @@
  23933. if (!timr)
  23934. return -EINVAL;
  23935. + rcu_read_lock();
  23936. kc = clockid_to_kclock(timr->it_clock);
  23937. if (WARN_ON_ONCE(!kc || !kc->timer_set))
  23938. error = -EINVAL;
  23939. @@ -911,9 +928,12 @@
  23940. unlock_timer(timr, flag);
  23941. if (error == TIMER_RETRY) {
  23942. + timer_wait_for_callback(kc, timr);
  23943. rtn = NULL; // We already got the old time...
  23944. + rcu_read_unlock();
  23945. goto retry;
  23946. }
  23947. + rcu_read_unlock();
  23948. if (old_setting && !error &&
  23949. copy_to_user(old_setting, &old_spec, sizeof (old_spec)))
  23950. @@ -951,10 +971,15 @@
  23951. if (!timer)
  23952. return -EINVAL;
  23953. + rcu_read_lock();
  23954. if (timer_delete_hook(timer) == TIMER_RETRY) {
  23955. unlock_timer(timer, flags);
  23956. + timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
  23957. + timer);
  23958. + rcu_read_unlock();
  23959. goto retry_delete;
  23960. }
  23961. + rcu_read_unlock();
  23962. spin_lock(&current->sighand->siglock);
  23963. list_del(&timer->list);
  23964. @@ -980,8 +1005,18 @@
  23965. retry_delete:
  23966. spin_lock_irqsave(&timer->it_lock, flags);
  23967. + /* On RT we can race with a deletion */
  23968. + if (!timer->it_signal) {
  23969. + unlock_timer(timer, flags);
  23970. + return;
  23971. + }
  23972. +
  23973. if (timer_delete_hook(timer) == TIMER_RETRY) {
  23974. + rcu_read_lock();
  23975. unlock_timer(timer, flags);
  23976. + timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
  23977. + timer);
  23978. + rcu_read_unlock();
  23979. goto retry_delete;
  23980. }
  23981. list_del(&timer->list);
  23982. diff -Nur linux-4.4.62.orig/kernel/time/tick-broadcast-hrtimer.c linux-4.4.62/kernel/time/tick-broadcast-hrtimer.c
  23983. --- linux-4.4.62.orig/kernel/time/tick-broadcast-hrtimer.c 2017-04-18 07:15:37.000000000 +0200
  23984. +++ linux-4.4.62/kernel/time/tick-broadcast-hrtimer.c 2017-04-18 17:38:08.222650330 +0200
  23985. @@ -106,5 +106,6 @@
  23986. {
  23987. hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
  23988. bctimer.function = bc_handler;
  23989. + bctimer.irqsafe = true;
  23990. clockevents_register_device(&ce_broadcast_hrtimer);
  23991. }
  23992. diff -Nur linux-4.4.62.orig/kernel/time/tick-common.c linux-4.4.62/kernel/time/tick-common.c
  23993. --- linux-4.4.62.orig/kernel/time/tick-common.c 2017-04-18 07:15:37.000000000 +0200
  23994. +++ linux-4.4.62/kernel/time/tick-common.c 2017-04-18 17:38:08.222650330 +0200
  23995. @@ -79,13 +79,15 @@
  23996. static void tick_periodic(int cpu)
  23997. {
  23998. if (tick_do_timer_cpu == cpu) {
  23999. - write_seqlock(&jiffies_lock);
  24000. + raw_spin_lock(&jiffies_lock);
  24001. + write_seqcount_begin(&jiffies_seq);
  24002. /* Keep track of the next tick event */
  24003. tick_next_period = ktime_add(tick_next_period, tick_period);
  24004. do_timer(1);
  24005. - write_sequnlock(&jiffies_lock);
  24006. + write_seqcount_end(&jiffies_seq);
  24007. + raw_spin_unlock(&jiffies_lock);
  24008. update_wall_time();
  24009. }
  24010. @@ -157,9 +159,9 @@
  24011. ktime_t next;
  24012. do {
  24013. - seq = read_seqbegin(&jiffies_lock);
  24014. + seq = read_seqcount_begin(&jiffies_seq);
  24015. next = tick_next_period;
  24016. - } while (read_seqretry(&jiffies_lock, seq));
  24017. + } while (read_seqcount_retry(&jiffies_seq, seq));
  24018. clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
  24019. diff -Nur linux-4.4.62.orig/kernel/time/tick-sched.c linux-4.4.62/kernel/time/tick-sched.c
  24020. --- linux-4.4.62.orig/kernel/time/tick-sched.c 2017-04-18 07:15:37.000000000 +0200
  24021. +++ linux-4.4.62/kernel/time/tick-sched.c 2017-04-18 17:38:08.222650330 +0200
  24022. @@ -62,7 +62,8 @@
  24023. return;
  24024. /* Reevalute with jiffies_lock held */
  24025. - write_seqlock(&jiffies_lock);
  24026. + raw_spin_lock(&jiffies_lock);
  24027. + write_seqcount_begin(&jiffies_seq);
  24028. delta = ktime_sub(now, last_jiffies_update);
  24029. if (delta.tv64 >= tick_period.tv64) {
  24030. @@ -85,10 +86,12 @@
  24031. /* Keep the tick_next_period variable up to date */
  24032. tick_next_period = ktime_add(last_jiffies_update, tick_period);
  24033. } else {
  24034. - write_sequnlock(&jiffies_lock);
  24035. + write_seqcount_end(&jiffies_seq);
  24036. + raw_spin_unlock(&jiffies_lock);
  24037. return;
  24038. }
  24039. - write_sequnlock(&jiffies_lock);
  24040. + write_seqcount_end(&jiffies_seq);
  24041. + raw_spin_unlock(&jiffies_lock);
  24042. update_wall_time();
  24043. }
  24044. @@ -99,12 +102,14 @@
  24045. {
  24046. ktime_t period;
  24047. - write_seqlock(&jiffies_lock);
  24048. + raw_spin_lock(&jiffies_lock);
  24049. + write_seqcount_begin(&jiffies_seq);
  24050. /* Did we start the jiffies update yet ? */
  24051. if (last_jiffies_update.tv64 == 0)
  24052. last_jiffies_update = tick_next_period;
  24053. period = last_jiffies_update;
  24054. - write_sequnlock(&jiffies_lock);
  24055. + write_seqcount_end(&jiffies_seq);
  24056. + raw_spin_unlock(&jiffies_lock);
  24057. return period;
  24058. }
  24059. @@ -176,6 +181,11 @@
  24060. return false;
  24061. }
  24062. + if (!arch_irq_work_has_interrupt()) {
  24063. + trace_tick_stop(0, "missing irq work interrupt\n");
  24064. + return false;
  24065. + }
  24066. +
  24067. /* sched_clock_tick() needs us? */
  24068. #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
  24069. /*
  24070. @@ -204,6 +214,7 @@
  24071. static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
  24072. .func = nohz_full_kick_work_func,
  24073. + .flags = IRQ_WORK_HARD_IRQ,
  24074. };
  24075. /*
  24076. @@ -578,10 +589,10 @@
  24077. /* Read jiffies and the time when jiffies were updated last */
  24078. do {
  24079. - seq = read_seqbegin(&jiffies_lock);
  24080. + seq = read_seqcount_begin(&jiffies_seq);
  24081. basemono = last_jiffies_update.tv64;
  24082. basejiff = jiffies;
  24083. - } while (read_seqretry(&jiffies_lock, seq));
  24084. + } while (read_seqcount_retry(&jiffies_seq, seq));
  24085. ts->last_jiffies = basejiff;
  24086. if (rcu_needs_cpu(basemono, &next_rcu) ||
  24087. @@ -753,14 +764,7 @@
  24088. return false;
  24089. if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
  24090. - static int ratelimit;
  24091. -
  24092. - if (ratelimit < 10 &&
  24093. - (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
  24094. - pr_warn("NOHZ: local_softirq_pending %02x\n",
  24095. - (unsigned int) local_softirq_pending());
  24096. - ratelimit++;
  24097. - }
  24098. + softirq_check_pending_idle();
  24099. return false;
  24100. }
  24101. @@ -1100,6 +1104,7 @@
  24102. * Emulate tick processing via per-CPU hrtimers:
  24103. */
  24104. hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
  24105. + ts->sched_timer.irqsafe = 1;
  24106. ts->sched_timer.function = tick_sched_timer;
  24107. /* Get the next period (per cpu) */
  24108. diff -Nur linux-4.4.62.orig/kernel/time/timekeeping.c linux-4.4.62/kernel/time/timekeeping.c
  24109. --- linux-4.4.62.orig/kernel/time/timekeeping.c 2017-04-18 07:15:37.000000000 +0200
  24110. +++ linux-4.4.62/kernel/time/timekeeping.c 2017-04-18 17:38:08.222650330 +0200
  24111. @@ -2070,8 +2070,10 @@
  24112. */
  24113. void xtime_update(unsigned long ticks)
  24114. {
  24115. - write_seqlock(&jiffies_lock);
  24116. + raw_spin_lock(&jiffies_lock);
  24117. + write_seqcount_begin(&jiffies_seq);
  24118. do_timer(ticks);
  24119. - write_sequnlock(&jiffies_lock);
  24120. + write_seqcount_end(&jiffies_seq);
  24121. + raw_spin_unlock(&jiffies_lock);
  24122. update_wall_time();
  24123. }
  24124. diff -Nur linux-4.4.62.orig/kernel/time/timekeeping.h linux-4.4.62/kernel/time/timekeeping.h
  24125. --- linux-4.4.62.orig/kernel/time/timekeeping.h 2017-04-18 07:15:37.000000000 +0200
  24126. +++ linux-4.4.62/kernel/time/timekeeping.h 2017-04-18 17:38:08.222650330 +0200
  24127. @@ -19,7 +19,8 @@
  24128. extern void do_timer(unsigned long ticks);
  24129. extern void update_wall_time(void);
  24130. -extern seqlock_t jiffies_lock;
  24131. +extern raw_spinlock_t jiffies_lock;
  24132. +extern seqcount_t jiffies_seq;
  24133. #define CS_NAME_LEN 32
  24134. diff -Nur linux-4.4.62.orig/kernel/time/timer.c linux-4.4.62/kernel/time/timer.c
  24135. --- linux-4.4.62.orig/kernel/time/timer.c 2017-04-18 07:15:37.000000000 +0200
  24136. +++ linux-4.4.62/kernel/time/timer.c 2017-04-18 17:38:08.226650485 +0200
  24137. @@ -80,6 +80,9 @@
  24138. struct tvec_base {
  24139. spinlock_t lock;
  24140. struct timer_list *running_timer;
  24141. +#ifdef CONFIG_PREEMPT_RT_FULL
  24142. + wait_queue_head_t wait_for_running_timer;
  24143. +#endif
  24144. unsigned long timer_jiffies;
  24145. unsigned long next_timer;
  24146. unsigned long active_timers;
  24147. @@ -777,6 +780,39 @@
  24148. cpu_relax();
  24149. }
  24150. }
  24151. +#ifdef CONFIG_PREEMPT_RT_FULL
  24152. +static inline struct tvec_base *switch_timer_base(struct timer_list *timer,
  24153. + struct tvec_base *old,
  24154. + struct tvec_base *new)
  24155. +{
  24156. + /*
  24157. + * We cannot do the below because we might be preempted and
  24158. + * then the preempter would see NULL and loop forever.
  24159. + */
  24160. + if (spin_trylock(&new->lock)) {
  24161. + WRITE_ONCE(timer->flags,
  24162. + (timer->flags & ~TIMER_BASEMASK) | new->cpu);
  24163. + spin_unlock(&old->lock);
  24164. + return new;
  24165. + }
  24166. + return old;
  24167. +}
  24168. +
  24169. +#else
  24170. +static inline struct tvec_base *switch_timer_base(struct timer_list *timer,
  24171. + struct tvec_base *old,
  24172. + struct tvec_base *new)
  24173. +{
  24174. + /* See the comment in lock_timer_base() */
  24175. + timer->flags |= TIMER_MIGRATING;
  24176. +
  24177. + spin_unlock(&old->lock);
  24178. + spin_lock(&new->lock);
  24179. + WRITE_ONCE(timer->flags,
  24180. + (timer->flags & ~TIMER_BASEMASK) | new->cpu);
  24181. + return new;
  24182. +}
  24183. +#endif
  24184. static inline int
  24185. __mod_timer(struct timer_list *timer, unsigned long expires,
  24186. @@ -807,16 +843,8 @@
  24187. * handler yet has not finished. This also guarantees that
  24188. * the timer is serialized wrt itself.
  24189. */
  24190. - if (likely(base->running_timer != timer)) {
  24191. - /* See the comment in lock_timer_base() */
  24192. - timer->flags |= TIMER_MIGRATING;
  24193. -
  24194. - spin_unlock(&base->lock);
  24195. - base = new_base;
  24196. - spin_lock(&base->lock);
  24197. - WRITE_ONCE(timer->flags,
  24198. - (timer->flags & ~TIMER_BASEMASK) | base->cpu);
  24199. - }
  24200. + if (likely(base->running_timer != timer))
  24201. + base = switch_timer_base(timer, base, new_base);
  24202. }
  24203. timer->expires = expires;
  24204. @@ -1006,6 +1034,33 @@
  24205. }
  24206. EXPORT_SYMBOL_GPL(add_timer_on);
  24207. +#ifdef CONFIG_PREEMPT_RT_FULL
  24208. +/*
  24209. + * Wait for a running timer
  24210. + */
  24211. +static void wait_for_running_timer(struct timer_list *timer)
  24212. +{
  24213. + struct tvec_base *base;
  24214. + u32 tf = timer->flags;
  24215. +
  24216. + if (tf & TIMER_MIGRATING)
  24217. + return;
  24218. +
  24219. + base = per_cpu_ptr(&tvec_bases, tf & TIMER_CPUMASK);
  24220. + wait_event(base->wait_for_running_timer,
  24221. + base->running_timer != timer);
  24222. +}
  24223. +
  24224. +# define wakeup_timer_waiters(b) wake_up_all(&(b)->wait_for_running_timer)
  24225. +#else
  24226. +static inline void wait_for_running_timer(struct timer_list *timer)
  24227. +{
  24228. + cpu_relax();
  24229. +}
  24230. +
  24231. +# define wakeup_timer_waiters(b) do { } while (0)
  24232. +#endif
  24233. +
  24234. /**
  24235. * del_timer - deactive a timer.
  24236. * @timer: the timer to be deactivated
  24237. @@ -1063,7 +1118,7 @@
  24238. }
  24239. EXPORT_SYMBOL(try_to_del_timer_sync);
  24240. -#ifdef CONFIG_SMP
  24241. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  24242. /**
  24243. * del_timer_sync - deactivate a timer and wait for the handler to finish.
  24244. * @timer: the timer to be deactivated
  24245. @@ -1123,7 +1178,7 @@
  24246. int ret = try_to_del_timer_sync(timer);
  24247. if (ret >= 0)
  24248. return ret;
  24249. - cpu_relax();
  24250. + wait_for_running_timer(timer);
  24251. }
  24252. }
  24253. EXPORT_SYMBOL(del_timer_sync);
  24254. @@ -1248,16 +1303,18 @@
  24255. if (irqsafe) {
  24256. spin_unlock(&base->lock);
  24257. call_timer_fn(timer, fn, data);
  24258. + base->running_timer = NULL;
  24259. spin_lock(&base->lock);
  24260. } else {
  24261. spin_unlock_irq(&base->lock);
  24262. call_timer_fn(timer, fn, data);
  24263. + base->running_timer = NULL;
  24264. spin_lock_irq(&base->lock);
  24265. }
  24266. }
  24267. }
  24268. - base->running_timer = NULL;
  24269. spin_unlock_irq(&base->lock);
  24270. + wakeup_timer_waiters(base);
  24271. }
  24272. #ifdef CONFIG_NO_HZ_COMMON
  24273. @@ -1390,6 +1447,14 @@
  24274. if (cpu_is_offline(smp_processor_id()))
  24275. return expires;
  24276. +#ifdef CONFIG_PREEMPT_RT_FULL
  24277. + /*
  24278. + * On PREEMPT_RT we cannot sleep here. As a result we can't take
  24279. + * the base lock to check when the next timer is pending and so
  24280. + * we assume the next jiffy.
  24281. + */
  24282. + return basem + TICK_NSEC;
  24283. +#endif
  24284. spin_lock(&base->lock);
  24285. if (base->active_timers) {
  24286. if (time_before_eq(base->next_timer, base->timer_jiffies))
  24287. @@ -1416,13 +1481,13 @@
  24288. /* Note: this timer irq context must be accounted for as well. */
  24289. account_process_tick(p, user_tick);
  24290. + scheduler_tick();
  24291. run_local_timers();
  24292. rcu_check_callbacks(user_tick);
  24293. -#ifdef CONFIG_IRQ_WORK
  24294. +#if defined(CONFIG_IRQ_WORK)
  24295. if (in_irq())
  24296. irq_work_tick();
  24297. #endif
  24298. - scheduler_tick();
  24299. run_posix_cpu_timers(p);
  24300. }
  24301. @@ -1433,6 +1498,8 @@
  24302. {
  24303. struct tvec_base *base = this_cpu_ptr(&tvec_bases);
  24304. + irq_work_tick_soft();
  24305. +
  24306. if (time_after_eq(jiffies, base->timer_jiffies))
  24307. __run_timers(base);
  24308. }
  24309. @@ -1589,7 +1656,7 @@
  24310. BUG_ON(cpu_online(cpu));
  24311. old_base = per_cpu_ptr(&tvec_bases, cpu);
  24312. - new_base = get_cpu_ptr(&tvec_bases);
  24313. + new_base = get_local_ptr(&tvec_bases);
  24314. /*
  24315. * The caller is globally serialized and nobody else
  24316. * takes two locks at once, deadlock is not possible.
  24317. @@ -1613,7 +1680,7 @@
  24318. spin_unlock(&old_base->lock);
  24319. spin_unlock_irq(&new_base->lock);
  24320. - put_cpu_ptr(&tvec_bases);
  24321. + put_local_ptr(&tvec_bases);
  24322. }
  24323. static int timer_cpu_notify(struct notifier_block *self,
  24324. @@ -1645,6 +1712,9 @@
  24325. base->cpu = cpu;
  24326. spin_lock_init(&base->lock);
  24327. +#ifdef CONFIG_PREEMPT_RT_FULL
  24328. + init_waitqueue_head(&base->wait_for_running_timer);
  24329. +#endif
  24330. base->timer_jiffies = jiffies;
  24331. base->next_timer = base->timer_jiffies;
  24332. diff -Nur linux-4.4.62.orig/kernel/trace/Kconfig linux-4.4.62/kernel/trace/Kconfig
  24333. --- linux-4.4.62.orig/kernel/trace/Kconfig 2017-04-18 07:15:37.000000000 +0200
  24334. +++ linux-4.4.62/kernel/trace/Kconfig 2017-04-18 17:38:08.226650485 +0200
  24335. @@ -187,6 +187,24 @@
  24336. enabled. This option and the preempt-off timing option can be
  24337. used together or separately.)
  24338. +config INTERRUPT_OFF_HIST
  24339. + bool "Interrupts-off Latency Histogram"
  24340. + depends on IRQSOFF_TRACER
  24341. + help
  24342. + This option generates continuously updated histograms (one per cpu)
  24343. + of the duration of time periods with interrupts disabled. The
  24344. + histograms are disabled by default. To enable them, write a non-zero
  24345. + number to
  24346. +
  24347. + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
  24348. +
  24349. + If PREEMPT_OFF_HIST is also selected, additional histograms (one
  24350. + per cpu) are generated that accumulate the duration of time periods
  24351. + when both interrupts and preemption are disabled. The histogram data
  24352. + will be located in the debug file system at
  24353. +
  24354. + /sys/kernel/debug/tracing/latency_hist/irqsoff
  24355. +
  24356. config PREEMPT_TRACER
  24357. bool "Preemption-off Latency Tracer"
  24358. default n
  24359. @@ -211,6 +229,24 @@
  24360. enabled. This option and the irqs-off timing option can be
  24361. used together or separately.)
  24362. +config PREEMPT_OFF_HIST
  24363. + bool "Preemption-off Latency Histogram"
  24364. + depends on PREEMPT_TRACER
  24365. + help
  24366. + This option generates continuously updated histograms (one per cpu)
  24367. + of the duration of time periods with preemption disabled. The
  24368. + histograms are disabled by default. To enable them, write a non-zero
  24369. + number to
  24370. +
  24371. + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
  24372. +
  24373. + If INTERRUPT_OFF_HIST is also selected, additional histograms (one
  24374. + per cpu) are generated that accumulate the duration of time periods
  24375. + when both interrupts and preemption are disabled. The histogram data
  24376. + will be located in the debug file system at
  24377. +
  24378. + /sys/kernel/debug/tracing/latency_hist/preemptoff
  24379. +
  24380. config SCHED_TRACER
  24381. bool "Scheduling Latency Tracer"
  24382. select GENERIC_TRACER
  24383. @@ -221,6 +257,74 @@
  24384. This tracer tracks the latency of the highest priority task
  24385. to be scheduled in, starting from the point it has woken up.
  24386. +config WAKEUP_LATENCY_HIST
  24387. + bool "Scheduling Latency Histogram"
  24388. + depends on SCHED_TRACER
  24389. + help
  24390. + This option generates continuously updated histograms (one per cpu)
  24391. + of the scheduling latency of the highest priority task.
  24392. + The histograms are disabled by default. To enable them, write a
  24393. + non-zero number to
  24394. +
  24395. + /sys/kernel/debug/tracing/latency_hist/enable/wakeup
  24396. +
  24397. + Two different algorithms are used, one to determine the latency of
  24398. + processes that exclusively use the highest priority of the system and
  24399. + another one to determine the latency of processes that share the
  24400. + highest system priority with other processes. The former is used to
  24401. + improve hardware and system software, the latter to optimize the
  24402. + priority design of a given system. The histogram data will be
  24403. + located in the debug file system at
  24404. +
  24405. + /sys/kernel/debug/tracing/latency_hist/wakeup
  24406. +
  24407. + and
  24408. +
  24409. + /sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio
  24410. +
  24411. + If both Scheduling Latency Histogram and Missed Timer Offsets
  24412. + Histogram are selected, additional histogram data will be collected
  24413. + that contain, in addition to the wakeup latency, the timer latency, in
  24414. + case the wakeup was triggered by an expired timer. These histograms
  24415. + are available in the
  24416. +
  24417. + /sys/kernel/debug/tracing/latency_hist/timerandwakeup
  24418. +
  24419. + directory. They reflect the apparent interrupt and scheduling latency
  24420. + and are best suitable to determine the worst-case latency of a given
  24421. + system. To enable these histograms, write a non-zero number to
  24422. +
  24423. + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
  24424. +
  24425. +config MISSED_TIMER_OFFSETS_HIST
  24426. + depends on HIGH_RES_TIMERS
  24427. + select GENERIC_TRACER
  24428. + bool "Missed Timer Offsets Histogram"
  24429. + help
  24430. + Generate a histogram of missed timer offsets in microseconds. The
  24431. + histograms are disabled by default. To enable them, write a non-zero
  24432. + number to
  24433. +
  24434. + /sys/kernel/debug/tracing/latency_hist/enable/missed_timer_offsets
  24435. +
  24436. + The histogram data will be located in the debug file system at
  24437. +
  24438. + /sys/kernel/debug/tracing/latency_hist/missed_timer_offsets
  24439. +
  24440. + If both Scheduling Latency Histogram and Missed Timer Offsets
  24441. + Histogram are selected, additional histogram data will be collected
  24442. + that contain, in addition to the wakeup latency, the timer latency, in
  24443. + case the wakeup was triggered by an expired timer. These histograms
  24444. + are available in the
  24445. +
  24446. + /sys/kernel/debug/tracing/latency_hist/timerandwakeup
  24447. +
  24448. + directory. They reflect the apparent interrupt and scheduling latency
  24449. + and are best suitable to determine the worst-case latency of a given
  24450. + system. To enable these histograms, write a non-zero number to
  24451. +
  24452. + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
  24453. +
  24454. config ENABLE_DEFAULT_TRACERS
  24455. bool "Trace process context switches and events"
  24456. depends on !GENERIC_TRACER
  24457. diff -Nur linux-4.4.62.orig/kernel/trace/latency_hist.c linux-4.4.62/kernel/trace/latency_hist.c
  24458. --- linux-4.4.62.orig/kernel/trace/latency_hist.c 1970-01-01 01:00:00.000000000 +0100
  24459. +++ linux-4.4.62/kernel/trace/latency_hist.c 2017-04-18 17:38:08.226650485 +0200
  24460. @@ -0,0 +1,1178 @@
  24461. +/*
  24462. + * kernel/trace/latency_hist.c
  24463. + *
  24464. + * Add support for histograms of preemption-off latency and
  24465. + * interrupt-off latency and wakeup latency, it depends on
  24466. + * Real-Time Preemption Support.
  24467. + *
  24468. + * Copyright (C) 2005 MontaVista Software, Inc.
  24469. + * Yi Yang <yyang@ch.mvista.com>
  24470. + *
  24471. + * Converted to work with the new latency tracer.
  24472. + * Copyright (C) 2008 Red Hat, Inc.
  24473. + * Steven Rostedt <srostedt@redhat.com>
  24474. + *
  24475. + */
  24476. +#include <linux/module.h>
  24477. +#include <linux/debugfs.h>
  24478. +#include <linux/seq_file.h>
  24479. +#include <linux/percpu.h>
  24480. +#include <linux/kallsyms.h>
  24481. +#include <linux/uaccess.h>
  24482. +#include <linux/sched.h>
  24483. +#include <linux/sched/rt.h>
  24484. +#include <linux/slab.h>
  24485. +#include <linux/atomic.h>
  24486. +#include <asm/div64.h>
  24487. +
  24488. +#include "trace.h"
  24489. +#include <trace/events/sched.h>
  24490. +
  24491. +#define NSECS_PER_USECS 1000L
  24492. +
  24493. +#define CREATE_TRACE_POINTS
  24494. +#include <trace/events/hist.h>
  24495. +
  24496. +enum {
  24497. + IRQSOFF_LATENCY = 0,
  24498. + PREEMPTOFF_LATENCY,
  24499. + PREEMPTIRQSOFF_LATENCY,
  24500. + WAKEUP_LATENCY,
  24501. + WAKEUP_LATENCY_SHAREDPRIO,
  24502. + MISSED_TIMER_OFFSETS,
  24503. + TIMERANDWAKEUP_LATENCY,
  24504. + MAX_LATENCY_TYPE,
  24505. +};
  24506. +
  24507. +#define MAX_ENTRY_NUM 10240
  24508. +
  24509. +struct hist_data {
  24510. + atomic_t hist_mode; /* 0 log, 1 don't log */
  24511. + long offset; /* set it to MAX_ENTRY_NUM/2 for a bipolar scale */
  24512. + long min_lat;
  24513. + long max_lat;
  24514. + unsigned long long below_hist_bound_samples;
  24515. + unsigned long long above_hist_bound_samples;
  24516. + long long accumulate_lat;
  24517. + unsigned long long total_samples;
  24518. + unsigned long long hist_array[MAX_ENTRY_NUM];
  24519. +};
  24520. +
  24521. +struct enable_data {
  24522. + int latency_type;
  24523. + int enabled;
  24524. +};
  24525. +
  24526. +static char *latency_hist_dir_root = "latency_hist";
  24527. +
  24528. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  24529. +static DEFINE_PER_CPU(struct hist_data, irqsoff_hist);
  24530. +static char *irqsoff_hist_dir = "irqsoff";
  24531. +static DEFINE_PER_CPU(cycles_t, hist_irqsoff_start);
  24532. +static DEFINE_PER_CPU(int, hist_irqsoff_counting);
  24533. +#endif
  24534. +
  24535. +#ifdef CONFIG_PREEMPT_OFF_HIST
  24536. +static DEFINE_PER_CPU(struct hist_data, preemptoff_hist);
  24537. +static char *preemptoff_hist_dir = "preemptoff";
  24538. +static DEFINE_PER_CPU(cycles_t, hist_preemptoff_start);
  24539. +static DEFINE_PER_CPU(int, hist_preemptoff_counting);
  24540. +#endif
  24541. +
  24542. +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
  24543. +static DEFINE_PER_CPU(struct hist_data, preemptirqsoff_hist);
  24544. +static char *preemptirqsoff_hist_dir = "preemptirqsoff";
  24545. +static DEFINE_PER_CPU(cycles_t, hist_preemptirqsoff_start);
  24546. +static DEFINE_PER_CPU(int, hist_preemptirqsoff_counting);
  24547. +#endif
  24548. +
  24549. +#if defined(CONFIG_PREEMPT_OFF_HIST) || defined(CONFIG_INTERRUPT_OFF_HIST)
  24550. +static notrace void probe_preemptirqsoff_hist(void *v, int reason, int start);
  24551. +static struct enable_data preemptirqsoff_enabled_data = {
  24552. + .latency_type = PREEMPTIRQSOFF_LATENCY,
  24553. + .enabled = 0,
  24554. +};
  24555. +#endif
  24556. +
  24557. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  24558. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  24559. +struct maxlatproc_data {
  24560. + char comm[FIELD_SIZEOF(struct task_struct, comm)];
  24561. + char current_comm[FIELD_SIZEOF(struct task_struct, comm)];
  24562. + int pid;
  24563. + int current_pid;
  24564. + int prio;
  24565. + int current_prio;
  24566. + long latency;
  24567. + long timeroffset;
  24568. + cycle_t timestamp;
  24569. +};
  24570. +#endif
  24571. +
  24572. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  24573. +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist);
  24574. +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist_sharedprio);
  24575. +static char *wakeup_latency_hist_dir = "wakeup";
  24576. +static char *wakeup_latency_hist_dir_sharedprio = "sharedprio";
  24577. +static notrace void probe_wakeup_latency_hist_start(void *v,
  24578. + struct task_struct *p);
  24579. +static notrace void probe_wakeup_latency_hist_stop(void *v,
  24580. + bool preempt, struct task_struct *prev, struct task_struct *next);
  24581. +static notrace void probe_sched_migrate_task(void *,
  24582. + struct task_struct *task, int cpu);
  24583. +static struct enable_data wakeup_latency_enabled_data = {
  24584. + .latency_type = WAKEUP_LATENCY,
  24585. + .enabled = 0,
  24586. +};
  24587. +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc);
  24588. +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc_sharedprio);
  24589. +static DEFINE_PER_CPU(struct task_struct *, wakeup_task);
  24590. +static DEFINE_PER_CPU(int, wakeup_sharedprio);
  24591. +static unsigned long wakeup_pid;
  24592. +#endif
  24593. +
  24594. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  24595. +static DEFINE_PER_CPU(struct hist_data, missed_timer_offsets);
  24596. +static char *missed_timer_offsets_dir = "missed_timer_offsets";
  24597. +static notrace void probe_hrtimer_interrupt(void *v, int cpu,
  24598. + long long offset, struct task_struct *curr, struct task_struct *task);
  24599. +static struct enable_data missed_timer_offsets_enabled_data = {
  24600. + .latency_type = MISSED_TIMER_OFFSETS,
  24601. + .enabled = 0,
  24602. +};
  24603. +static DEFINE_PER_CPU(struct maxlatproc_data, missed_timer_offsets_maxlatproc);
  24604. +static unsigned long missed_timer_offsets_pid;
  24605. +#endif
  24606. +
  24607. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  24608. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  24609. +static DEFINE_PER_CPU(struct hist_data, timerandwakeup_latency_hist);
  24610. +static char *timerandwakeup_latency_hist_dir = "timerandwakeup";
  24611. +static struct enable_data timerandwakeup_enabled_data = {
  24612. + .latency_type = TIMERANDWAKEUP_LATENCY,
  24613. + .enabled = 0,
  24614. +};
  24615. +static DEFINE_PER_CPU(struct maxlatproc_data, timerandwakeup_maxlatproc);
  24616. +#endif
  24617. +
  24618. +void notrace latency_hist(int latency_type, int cpu, long latency,
  24619. + long timeroffset, cycle_t stop,
  24620. + struct task_struct *p)
  24621. +{
  24622. + struct hist_data *my_hist;
  24623. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  24624. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  24625. + struct maxlatproc_data *mp = NULL;
  24626. +#endif
  24627. +
  24628. + if (!cpu_possible(cpu) || latency_type < 0 ||
  24629. + latency_type >= MAX_LATENCY_TYPE)
  24630. + return;
  24631. +
  24632. + switch (latency_type) {
  24633. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  24634. + case IRQSOFF_LATENCY:
  24635. + my_hist = &per_cpu(irqsoff_hist, cpu);
  24636. + break;
  24637. +#endif
  24638. +#ifdef CONFIG_PREEMPT_OFF_HIST
  24639. + case PREEMPTOFF_LATENCY:
  24640. + my_hist = &per_cpu(preemptoff_hist, cpu);
  24641. + break;
  24642. +#endif
  24643. +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
  24644. + case PREEMPTIRQSOFF_LATENCY:
  24645. + my_hist = &per_cpu(preemptirqsoff_hist, cpu);
  24646. + break;
  24647. +#endif
  24648. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  24649. + case WAKEUP_LATENCY:
  24650. + my_hist = &per_cpu(wakeup_latency_hist, cpu);
  24651. + mp = &per_cpu(wakeup_maxlatproc, cpu);
  24652. + break;
  24653. + case WAKEUP_LATENCY_SHAREDPRIO:
  24654. + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
  24655. + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
  24656. + break;
  24657. +#endif
  24658. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  24659. + case MISSED_TIMER_OFFSETS:
  24660. + my_hist = &per_cpu(missed_timer_offsets, cpu);
  24661. + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
  24662. + break;
  24663. +#endif
  24664. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  24665. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  24666. + case TIMERANDWAKEUP_LATENCY:
  24667. + my_hist = &per_cpu(timerandwakeup_latency_hist, cpu);
  24668. + mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
  24669. + break;
  24670. +#endif
  24671. +
  24672. + default:
  24673. + return;
  24674. + }
  24675. +
  24676. + latency += my_hist->offset;
  24677. +
  24678. + if (atomic_read(&my_hist->hist_mode) == 0)
  24679. + return;
  24680. +
  24681. + if (latency < 0 || latency >= MAX_ENTRY_NUM) {
  24682. + if (latency < 0)
  24683. + my_hist->below_hist_bound_samples++;
  24684. + else
  24685. + my_hist->above_hist_bound_samples++;
  24686. + } else
  24687. + my_hist->hist_array[latency]++;
  24688. +
  24689. + if (unlikely(latency > my_hist->max_lat ||
  24690. + my_hist->min_lat == LONG_MAX)) {
  24691. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  24692. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  24693. + if (latency_type == WAKEUP_LATENCY ||
  24694. + latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
  24695. + latency_type == MISSED_TIMER_OFFSETS ||
  24696. + latency_type == TIMERANDWAKEUP_LATENCY) {
  24697. + strncpy(mp->comm, p->comm, sizeof(mp->comm));
  24698. + strncpy(mp->current_comm, current->comm,
  24699. + sizeof(mp->current_comm));
  24700. + mp->pid = task_pid_nr(p);
  24701. + mp->current_pid = task_pid_nr(current);
  24702. + mp->prio = p->prio;
  24703. + mp->current_prio = current->prio;
  24704. + mp->latency = latency;
  24705. + mp->timeroffset = timeroffset;
  24706. + mp->timestamp = stop;
  24707. + }
  24708. +#endif
  24709. + my_hist->max_lat = latency;
  24710. + }
  24711. + if (unlikely(latency < my_hist->min_lat))
  24712. + my_hist->min_lat = latency;
  24713. + my_hist->total_samples++;
  24714. + my_hist->accumulate_lat += latency;
  24715. +}
  24716. +
  24717. +static void *l_start(struct seq_file *m, loff_t *pos)
  24718. +{
  24719. + loff_t *index_ptr = NULL;
  24720. + loff_t index = *pos;
  24721. + struct hist_data *my_hist = m->private;
  24722. +
  24723. + if (index == 0) {
  24724. + char minstr[32], avgstr[32], maxstr[32];
  24725. +
  24726. + atomic_dec(&my_hist->hist_mode);
  24727. +
  24728. + if (likely(my_hist->total_samples)) {
  24729. + long avg = (long) div64_s64(my_hist->accumulate_lat,
  24730. + my_hist->total_samples);
  24731. + snprintf(minstr, sizeof(minstr), "%ld",
  24732. + my_hist->min_lat - my_hist->offset);
  24733. + snprintf(avgstr, sizeof(avgstr), "%ld",
  24734. + avg - my_hist->offset);
  24735. + snprintf(maxstr, sizeof(maxstr), "%ld",
  24736. + my_hist->max_lat - my_hist->offset);
  24737. + } else {
  24738. + strcpy(minstr, "<undef>");
  24739. + strcpy(avgstr, minstr);
  24740. + strcpy(maxstr, minstr);
  24741. + }
  24742. +
  24743. + seq_printf(m, "#Minimum latency: %s microseconds\n"
  24744. + "#Average latency: %s microseconds\n"
  24745. + "#Maximum latency: %s microseconds\n"
  24746. + "#Total samples: %llu\n"
  24747. + "#There are %llu samples lower than %ld"
  24748. + " microseconds.\n"
  24749. + "#There are %llu samples greater or equal"
  24750. + " than %ld microseconds.\n"
  24751. + "#usecs\t%16s\n",
  24752. + minstr, avgstr, maxstr,
  24753. + my_hist->total_samples,
  24754. + my_hist->below_hist_bound_samples,
  24755. + -my_hist->offset,
  24756. + my_hist->above_hist_bound_samples,
  24757. + MAX_ENTRY_NUM - my_hist->offset,
  24758. + "samples");
  24759. + }
  24760. + if (index < MAX_ENTRY_NUM) {
  24761. + index_ptr = kmalloc(sizeof(loff_t), GFP_KERNEL);
  24762. + if (index_ptr)
  24763. + *index_ptr = index;
  24764. + }
  24765. +
  24766. + return index_ptr;
  24767. +}
  24768. +
  24769. +static void *l_next(struct seq_file *m, void *p, loff_t *pos)
  24770. +{
  24771. + loff_t *index_ptr = p;
  24772. + struct hist_data *my_hist = m->private;
  24773. +
  24774. + if (++*pos >= MAX_ENTRY_NUM) {
  24775. + atomic_inc(&my_hist->hist_mode);
  24776. + return NULL;
  24777. + }
  24778. + *index_ptr = *pos;
  24779. + return index_ptr;
  24780. +}
  24781. +
  24782. +static void l_stop(struct seq_file *m, void *p)
  24783. +{
  24784. + kfree(p);
  24785. +}
  24786. +
  24787. +static int l_show(struct seq_file *m, void *p)
  24788. +{
  24789. + int index = *(loff_t *) p;
  24790. + struct hist_data *my_hist = m->private;
  24791. +
  24792. + seq_printf(m, "%6ld\t%16llu\n", index - my_hist->offset,
  24793. + my_hist->hist_array[index]);
  24794. + return 0;
  24795. +}
  24796. +
  24797. +static const struct seq_operations latency_hist_seq_op = {
  24798. + .start = l_start,
  24799. + .next = l_next,
  24800. + .stop = l_stop,
  24801. + .show = l_show
  24802. +};
  24803. +
  24804. +static int latency_hist_open(struct inode *inode, struct file *file)
  24805. +{
  24806. + int ret;
  24807. +
  24808. + ret = seq_open(file, &latency_hist_seq_op);
  24809. + if (!ret) {
  24810. + struct seq_file *seq = file->private_data;
  24811. + seq->private = inode->i_private;
  24812. + }
  24813. + return ret;
  24814. +}
  24815. +
  24816. +static const struct file_operations latency_hist_fops = {
  24817. + .open = latency_hist_open,
  24818. + .read = seq_read,
  24819. + .llseek = seq_lseek,
  24820. + .release = seq_release,
  24821. +};
  24822. +
  24823. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  24824. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  24825. +static void clear_maxlatprocdata(struct maxlatproc_data *mp)
  24826. +{
  24827. + mp->comm[0] = mp->current_comm[0] = '\0';
  24828. + mp->prio = mp->current_prio = mp->pid = mp->current_pid =
  24829. + mp->latency = mp->timeroffset = -1;
  24830. + mp->timestamp = 0;
  24831. +}
  24832. +#endif
  24833. +
  24834. +static void hist_reset(struct hist_data *hist)
  24835. +{
  24836. + atomic_dec(&hist->hist_mode);
  24837. +
  24838. + memset(hist->hist_array, 0, sizeof(hist->hist_array));
  24839. + hist->below_hist_bound_samples = 0ULL;
  24840. + hist->above_hist_bound_samples = 0ULL;
  24841. + hist->min_lat = LONG_MAX;
  24842. + hist->max_lat = LONG_MIN;
  24843. + hist->total_samples = 0ULL;
  24844. + hist->accumulate_lat = 0LL;
  24845. +
  24846. + atomic_inc(&hist->hist_mode);
  24847. +}
  24848. +
  24849. +static ssize_t
  24850. +latency_hist_reset(struct file *file, const char __user *a,
  24851. + size_t size, loff_t *off)
  24852. +{
  24853. + int cpu;
  24854. + struct hist_data *hist = NULL;
  24855. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  24856. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  24857. + struct maxlatproc_data *mp = NULL;
  24858. +#endif
  24859. + off_t latency_type = (off_t) file->private_data;
  24860. +
  24861. + for_each_online_cpu(cpu) {
  24862. +
  24863. + switch (latency_type) {
  24864. +#ifdef CONFIG_PREEMPT_OFF_HIST
  24865. + case PREEMPTOFF_LATENCY:
  24866. + hist = &per_cpu(preemptoff_hist, cpu);
  24867. + break;
  24868. +#endif
  24869. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  24870. + case IRQSOFF_LATENCY:
  24871. + hist = &per_cpu(irqsoff_hist, cpu);
  24872. + break;
  24873. +#endif
  24874. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  24875. + case PREEMPTIRQSOFF_LATENCY:
  24876. + hist = &per_cpu(preemptirqsoff_hist, cpu);
  24877. + break;
  24878. +#endif
  24879. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  24880. + case WAKEUP_LATENCY:
  24881. + hist = &per_cpu(wakeup_latency_hist, cpu);
  24882. + mp = &per_cpu(wakeup_maxlatproc, cpu);
  24883. + break;
  24884. + case WAKEUP_LATENCY_SHAREDPRIO:
  24885. + hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
  24886. + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
  24887. + break;
  24888. +#endif
  24889. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  24890. + case MISSED_TIMER_OFFSETS:
  24891. + hist = &per_cpu(missed_timer_offsets, cpu);
  24892. + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
  24893. + break;
  24894. +#endif
  24895. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  24896. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  24897. + case TIMERANDWAKEUP_LATENCY:
  24898. + hist = &per_cpu(timerandwakeup_latency_hist, cpu);
  24899. + mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
  24900. + break;
  24901. +#endif
  24902. + }
  24903. +
  24904. + hist_reset(hist);
  24905. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  24906. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  24907. + if (latency_type == WAKEUP_LATENCY ||
  24908. + latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
  24909. + latency_type == MISSED_TIMER_OFFSETS ||
  24910. + latency_type == TIMERANDWAKEUP_LATENCY)
  24911. + clear_maxlatprocdata(mp);
  24912. +#endif
  24913. + }
  24914. +
  24915. + return size;
  24916. +}
  24917. +
  24918. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  24919. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  24920. +static ssize_t
  24921. +show_pid(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
  24922. +{
  24923. + char buf[64];
  24924. + int r;
  24925. + unsigned long *this_pid = file->private_data;
  24926. +
  24927. + r = snprintf(buf, sizeof(buf), "%lu\n", *this_pid);
  24928. + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  24929. +}
  24930. +
  24931. +static ssize_t do_pid(struct file *file, const char __user *ubuf,
  24932. + size_t cnt, loff_t *ppos)
  24933. +{
  24934. + char buf[64];
  24935. + unsigned long pid;
  24936. + unsigned long *this_pid = file->private_data;
  24937. +
  24938. + if (cnt >= sizeof(buf))
  24939. + return -EINVAL;
  24940. +
  24941. + if (copy_from_user(&buf, ubuf, cnt))
  24942. + return -EFAULT;
  24943. +
  24944. + buf[cnt] = '\0';
  24945. +
  24946. + if (kstrtoul(buf, 10, &pid))
  24947. + return -EINVAL;
  24948. +
  24949. + *this_pid = pid;
  24950. +
  24951. + return cnt;
  24952. +}
  24953. +#endif
  24954. +
  24955. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  24956. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  24957. +static ssize_t
  24958. +show_maxlatproc(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
  24959. +{
  24960. + int r;
  24961. + struct maxlatproc_data *mp = file->private_data;
  24962. + int strmaxlen = (TASK_COMM_LEN * 2) + (8 * 8);
  24963. + unsigned long long t;
  24964. + unsigned long usecs, secs;
  24965. + char *buf;
  24966. +
  24967. + if (mp->pid == -1 || mp->current_pid == -1) {
  24968. + buf = "(none)\n";
  24969. + return simple_read_from_buffer(ubuf, cnt, ppos, buf,
  24970. + strlen(buf));
  24971. + }
  24972. +
  24973. + buf = kmalloc(strmaxlen, GFP_KERNEL);
  24974. + if (buf == NULL)
  24975. + return -ENOMEM;
  24976. +
  24977. + t = ns2usecs(mp->timestamp);
  24978. + usecs = do_div(t, USEC_PER_SEC);
  24979. + secs = (unsigned long) t;
  24980. + r = snprintf(buf, strmaxlen,
  24981. + "%d %d %ld (%ld) %s <- %d %d %s %lu.%06lu\n", mp->pid,
  24982. + MAX_RT_PRIO-1 - mp->prio, mp->latency, mp->timeroffset, mp->comm,
  24983. + mp->current_pid, MAX_RT_PRIO-1 - mp->current_prio, mp->current_comm,
  24984. + secs, usecs);
  24985. + r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  24986. + kfree(buf);
  24987. + return r;
  24988. +}
  24989. +#endif
  24990. +
  24991. +static ssize_t
  24992. +show_enable(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
  24993. +{
  24994. + char buf[64];
  24995. + struct enable_data *ed = file->private_data;
  24996. + int r;
  24997. +
  24998. + r = snprintf(buf, sizeof(buf), "%d\n", ed->enabled);
  24999. + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  25000. +}
  25001. +
  25002. +static ssize_t
  25003. +do_enable(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos)
  25004. +{
  25005. + char buf[64];
  25006. + long enable;
  25007. + struct enable_data *ed = file->private_data;
  25008. +
  25009. + if (cnt >= sizeof(buf))
  25010. + return -EINVAL;
  25011. +
  25012. + if (copy_from_user(&buf, ubuf, cnt))
  25013. + return -EFAULT;
  25014. +
  25015. + buf[cnt] = 0;
  25016. +
  25017. + if (kstrtoul(buf, 10, &enable))
  25018. + return -EINVAL;
  25019. +
  25020. + if ((enable && ed->enabled) || (!enable && !ed->enabled))
  25021. + return cnt;
  25022. +
  25023. + if (enable) {
  25024. + int ret;
  25025. +
  25026. + switch (ed->latency_type) {
  25027. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  25028. + case PREEMPTIRQSOFF_LATENCY:
  25029. + ret = register_trace_preemptirqsoff_hist(
  25030. + probe_preemptirqsoff_hist, NULL);
  25031. + if (ret) {
  25032. + pr_info("wakeup trace: Couldn't assign "
  25033. + "probe_preemptirqsoff_hist "
  25034. + "to trace_preemptirqsoff_hist\n");
  25035. + return ret;
  25036. + }
  25037. + break;
  25038. +#endif
  25039. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  25040. + case WAKEUP_LATENCY:
  25041. + ret = register_trace_sched_wakeup(
  25042. + probe_wakeup_latency_hist_start, NULL);
  25043. + if (ret) {
  25044. + pr_info("wakeup trace: Couldn't assign "
  25045. + "probe_wakeup_latency_hist_start "
  25046. + "to trace_sched_wakeup\n");
  25047. + return ret;
  25048. + }
  25049. + ret = register_trace_sched_wakeup_new(
  25050. + probe_wakeup_latency_hist_start, NULL);
  25051. + if (ret) {
  25052. + pr_info("wakeup trace: Couldn't assign "
  25053. + "probe_wakeup_latency_hist_start "
  25054. + "to trace_sched_wakeup_new\n");
  25055. + unregister_trace_sched_wakeup(
  25056. + probe_wakeup_latency_hist_start, NULL);
  25057. + return ret;
  25058. + }
  25059. + ret = register_trace_sched_switch(
  25060. + probe_wakeup_latency_hist_stop, NULL);
  25061. + if (ret) {
  25062. + pr_info("wakeup trace: Couldn't assign "
  25063. + "probe_wakeup_latency_hist_stop "
  25064. + "to trace_sched_switch\n");
  25065. + unregister_trace_sched_wakeup(
  25066. + probe_wakeup_latency_hist_start, NULL);
  25067. + unregister_trace_sched_wakeup_new(
  25068. + probe_wakeup_latency_hist_start, NULL);
  25069. + return ret;
  25070. + }
  25071. + ret = register_trace_sched_migrate_task(
  25072. + probe_sched_migrate_task, NULL);
  25073. + if (ret) {
  25074. + pr_info("wakeup trace: Couldn't assign "
  25075. + "probe_sched_migrate_task "
  25076. + "to trace_sched_migrate_task\n");
  25077. + unregister_trace_sched_wakeup(
  25078. + probe_wakeup_latency_hist_start, NULL);
  25079. + unregister_trace_sched_wakeup_new(
  25080. + probe_wakeup_latency_hist_start, NULL);
  25081. + unregister_trace_sched_switch(
  25082. + probe_wakeup_latency_hist_stop, NULL);
  25083. + return ret;
  25084. + }
  25085. + break;
  25086. +#endif
  25087. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  25088. + case MISSED_TIMER_OFFSETS:
  25089. + ret = register_trace_hrtimer_interrupt(
  25090. + probe_hrtimer_interrupt, NULL);
  25091. + if (ret) {
  25092. + pr_info("wakeup trace: Couldn't assign "
  25093. + "probe_hrtimer_interrupt "
  25094. + "to trace_hrtimer_interrupt\n");
  25095. + return ret;
  25096. + }
  25097. + break;
  25098. +#endif
  25099. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  25100. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  25101. + case TIMERANDWAKEUP_LATENCY:
  25102. + if (!wakeup_latency_enabled_data.enabled ||
  25103. + !missed_timer_offsets_enabled_data.enabled)
  25104. + return -EINVAL;
  25105. + break;
  25106. +#endif
  25107. + default:
  25108. + break;
  25109. + }
  25110. + } else {
  25111. + switch (ed->latency_type) {
  25112. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  25113. + case PREEMPTIRQSOFF_LATENCY:
  25114. + {
  25115. + int cpu;
  25116. +
  25117. + unregister_trace_preemptirqsoff_hist(
  25118. + probe_preemptirqsoff_hist, NULL);
  25119. + for_each_online_cpu(cpu) {
  25120. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  25121. + per_cpu(hist_irqsoff_counting,
  25122. + cpu) = 0;
  25123. +#endif
  25124. +#ifdef CONFIG_PREEMPT_OFF_HIST
  25125. + per_cpu(hist_preemptoff_counting,
  25126. + cpu) = 0;
  25127. +#endif
  25128. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  25129. + per_cpu(hist_preemptirqsoff_counting,
  25130. + cpu) = 0;
  25131. +#endif
  25132. + }
  25133. + }
  25134. + break;
  25135. +#endif
  25136. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  25137. + case WAKEUP_LATENCY:
  25138. + {
  25139. + int cpu;
  25140. +
  25141. + unregister_trace_sched_wakeup(
  25142. + probe_wakeup_latency_hist_start, NULL);
  25143. + unregister_trace_sched_wakeup_new(
  25144. + probe_wakeup_latency_hist_start, NULL);
  25145. + unregister_trace_sched_switch(
  25146. + probe_wakeup_latency_hist_stop, NULL);
  25147. + unregister_trace_sched_migrate_task(
  25148. + probe_sched_migrate_task, NULL);
  25149. +
  25150. + for_each_online_cpu(cpu) {
  25151. + per_cpu(wakeup_task, cpu) = NULL;
  25152. + per_cpu(wakeup_sharedprio, cpu) = 0;
  25153. + }
  25154. + }
  25155. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  25156. + timerandwakeup_enabled_data.enabled = 0;
  25157. +#endif
  25158. + break;
  25159. +#endif
  25160. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  25161. + case MISSED_TIMER_OFFSETS:
  25162. + unregister_trace_hrtimer_interrupt(
  25163. + probe_hrtimer_interrupt, NULL);
  25164. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  25165. + timerandwakeup_enabled_data.enabled = 0;
  25166. +#endif
  25167. + break;
  25168. +#endif
  25169. + default:
  25170. + break;
  25171. + }
  25172. + }
  25173. + ed->enabled = enable;
  25174. + return cnt;
  25175. +}
  25176. +
  25177. +static const struct file_operations latency_hist_reset_fops = {
  25178. + .open = tracing_open_generic,
  25179. + .write = latency_hist_reset,
  25180. +};
  25181. +
  25182. +static const struct file_operations enable_fops = {
  25183. + .open = tracing_open_generic,
  25184. + .read = show_enable,
  25185. + .write = do_enable,
  25186. +};
  25187. +
  25188. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  25189. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  25190. +static const struct file_operations pid_fops = {
  25191. + .open = tracing_open_generic,
  25192. + .read = show_pid,
  25193. + .write = do_pid,
  25194. +};
  25195. +
  25196. +static const struct file_operations maxlatproc_fops = {
  25197. + .open = tracing_open_generic,
  25198. + .read = show_maxlatproc,
  25199. +};
  25200. +#endif
  25201. +
  25202. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  25203. +static notrace void probe_preemptirqsoff_hist(void *v, int reason,
  25204. + int starthist)
  25205. +{
  25206. + int cpu = raw_smp_processor_id();
  25207. + int time_set = 0;
  25208. +
  25209. + if (starthist) {
  25210. + cycle_t uninitialized_var(start);
  25211. +
  25212. + if (!preempt_count() && !irqs_disabled())
  25213. + return;
  25214. +
  25215. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  25216. + if ((reason == IRQS_OFF || reason == TRACE_START) &&
  25217. + !per_cpu(hist_irqsoff_counting, cpu)) {
  25218. + per_cpu(hist_irqsoff_counting, cpu) = 1;
  25219. + start = ftrace_now(cpu);
  25220. + time_set++;
  25221. + per_cpu(hist_irqsoff_start, cpu) = start;
  25222. + }
  25223. +#endif
  25224. +
  25225. +#ifdef CONFIG_PREEMPT_OFF_HIST
  25226. + if ((reason == PREEMPT_OFF || reason == TRACE_START) &&
  25227. + !per_cpu(hist_preemptoff_counting, cpu)) {
  25228. + per_cpu(hist_preemptoff_counting, cpu) = 1;
  25229. + if (!(time_set++))
  25230. + start = ftrace_now(cpu);
  25231. + per_cpu(hist_preemptoff_start, cpu) = start;
  25232. + }
  25233. +#endif
  25234. +
  25235. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  25236. + if (per_cpu(hist_irqsoff_counting, cpu) &&
  25237. + per_cpu(hist_preemptoff_counting, cpu) &&
  25238. + !per_cpu(hist_preemptirqsoff_counting, cpu)) {
  25239. + per_cpu(hist_preemptirqsoff_counting, cpu) = 1;
  25240. + if (!time_set)
  25241. + start = ftrace_now(cpu);
  25242. + per_cpu(hist_preemptirqsoff_start, cpu) = start;
  25243. + }
  25244. +#endif
  25245. + } else {
  25246. + cycle_t uninitialized_var(stop);
  25247. +
  25248. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  25249. + if ((reason == IRQS_ON || reason == TRACE_STOP) &&
  25250. + per_cpu(hist_irqsoff_counting, cpu)) {
  25251. + cycle_t start = per_cpu(hist_irqsoff_start, cpu);
  25252. +
  25253. + stop = ftrace_now(cpu);
  25254. + time_set++;
  25255. + if (start) {
  25256. + long latency = ((long) (stop - start)) /
  25257. + NSECS_PER_USECS;
  25258. +
  25259. + latency_hist(IRQSOFF_LATENCY, cpu, latency, 0,
  25260. + stop, NULL);
  25261. + }
  25262. + per_cpu(hist_irqsoff_counting, cpu) = 0;
  25263. + }
  25264. +#endif
  25265. +
  25266. +#ifdef CONFIG_PREEMPT_OFF_HIST
  25267. + if ((reason == PREEMPT_ON || reason == TRACE_STOP) &&
  25268. + per_cpu(hist_preemptoff_counting, cpu)) {
  25269. + cycle_t start = per_cpu(hist_preemptoff_start, cpu);
  25270. +
  25271. + if (!(time_set++))
  25272. + stop = ftrace_now(cpu);
  25273. + if (start) {
  25274. + long latency = ((long) (stop - start)) /
  25275. + NSECS_PER_USECS;
  25276. +
  25277. + latency_hist(PREEMPTOFF_LATENCY, cpu, latency,
  25278. + 0, stop, NULL);
  25279. + }
  25280. + per_cpu(hist_preemptoff_counting, cpu) = 0;
  25281. + }
  25282. +#endif
  25283. +
  25284. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  25285. + if ((!per_cpu(hist_irqsoff_counting, cpu) ||
  25286. + !per_cpu(hist_preemptoff_counting, cpu)) &&
  25287. + per_cpu(hist_preemptirqsoff_counting, cpu)) {
  25288. + cycle_t start = per_cpu(hist_preemptirqsoff_start, cpu);
  25289. +
  25290. + if (!time_set)
  25291. + stop = ftrace_now(cpu);
  25292. + if (start) {
  25293. + long latency = ((long) (stop - start)) /
  25294. + NSECS_PER_USECS;
  25295. +
  25296. + latency_hist(PREEMPTIRQSOFF_LATENCY, cpu,
  25297. + latency, 0, stop, NULL);
  25298. + }
  25299. + per_cpu(hist_preemptirqsoff_counting, cpu) = 0;
  25300. + }
  25301. +#endif
  25302. + }
  25303. +}
  25304. +#endif
  25305. +
  25306. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  25307. +static DEFINE_RAW_SPINLOCK(wakeup_lock);
  25308. +static notrace void probe_sched_migrate_task(void *v, struct task_struct *task,
  25309. + int cpu)
  25310. +{
  25311. + int old_cpu = task_cpu(task);
  25312. +
  25313. + if (cpu != old_cpu) {
  25314. + unsigned long flags;
  25315. + struct task_struct *cpu_wakeup_task;
  25316. +
  25317. + raw_spin_lock_irqsave(&wakeup_lock, flags);
  25318. +
  25319. + cpu_wakeup_task = per_cpu(wakeup_task, old_cpu);
  25320. + if (task == cpu_wakeup_task) {
  25321. + put_task_struct(cpu_wakeup_task);
  25322. + per_cpu(wakeup_task, old_cpu) = NULL;
  25323. + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = task;
  25324. + get_task_struct(cpu_wakeup_task);
  25325. + }
  25326. +
  25327. + raw_spin_unlock_irqrestore(&wakeup_lock, flags);
  25328. + }
  25329. +}
  25330. +
  25331. +static notrace void probe_wakeup_latency_hist_start(void *v,
  25332. + struct task_struct *p)
  25333. +{
  25334. + unsigned long flags;
  25335. + struct task_struct *curr = current;
  25336. + int cpu = task_cpu(p);
  25337. + struct task_struct *cpu_wakeup_task;
  25338. +
  25339. + raw_spin_lock_irqsave(&wakeup_lock, flags);
  25340. +
  25341. + cpu_wakeup_task = per_cpu(wakeup_task, cpu);
  25342. +
  25343. + if (wakeup_pid) {
  25344. + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
  25345. + p->prio == curr->prio)
  25346. + per_cpu(wakeup_sharedprio, cpu) = 1;
  25347. + if (likely(wakeup_pid != task_pid_nr(p)))
  25348. + goto out;
  25349. + } else {
  25350. + if (likely(!rt_task(p)) ||
  25351. + (cpu_wakeup_task && p->prio > cpu_wakeup_task->prio) ||
  25352. + p->prio > curr->prio)
  25353. + goto out;
  25354. + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
  25355. + p->prio == curr->prio)
  25356. + per_cpu(wakeup_sharedprio, cpu) = 1;
  25357. + }
  25358. +
  25359. + if (cpu_wakeup_task)
  25360. + put_task_struct(cpu_wakeup_task);
  25361. + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = p;
  25362. + get_task_struct(cpu_wakeup_task);
  25363. + cpu_wakeup_task->preempt_timestamp_hist =
  25364. + ftrace_now(raw_smp_processor_id());
  25365. +out:
  25366. + raw_spin_unlock_irqrestore(&wakeup_lock, flags);
  25367. +}
  25368. +
  25369. +static notrace void probe_wakeup_latency_hist_stop(void *v,
  25370. + bool preempt, struct task_struct *prev, struct task_struct *next)
  25371. +{
  25372. + unsigned long flags;
  25373. + int cpu = task_cpu(next);
  25374. + long latency;
  25375. + cycle_t stop;
  25376. + struct task_struct *cpu_wakeup_task;
  25377. +
  25378. + raw_spin_lock_irqsave(&wakeup_lock, flags);
  25379. +
  25380. + cpu_wakeup_task = per_cpu(wakeup_task, cpu);
  25381. +
  25382. + if (cpu_wakeup_task == NULL)
  25383. + goto out;
  25384. +
  25385. + /* Already running? */
  25386. + if (unlikely(current == cpu_wakeup_task))
  25387. + goto out_reset;
  25388. +
  25389. + if (next != cpu_wakeup_task) {
  25390. + if (next->prio < cpu_wakeup_task->prio)
  25391. + goto out_reset;
  25392. +
  25393. + if (next->prio == cpu_wakeup_task->prio)
  25394. + per_cpu(wakeup_sharedprio, cpu) = 1;
  25395. +
  25396. + goto out;
  25397. + }
  25398. +
  25399. + if (current->prio == cpu_wakeup_task->prio)
  25400. + per_cpu(wakeup_sharedprio, cpu) = 1;
  25401. +
  25402. + /*
  25403. + * The task we are waiting for is about to be switched to.
  25404. + * Calculate latency and store it in histogram.
  25405. + */
  25406. + stop = ftrace_now(raw_smp_processor_id());
  25407. +
  25408. + latency = ((long) (stop - next->preempt_timestamp_hist)) /
  25409. + NSECS_PER_USECS;
  25410. +
  25411. + if (per_cpu(wakeup_sharedprio, cpu)) {
  25412. + latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, 0, stop,
  25413. + next);
  25414. + per_cpu(wakeup_sharedprio, cpu) = 0;
  25415. + } else {
  25416. + latency_hist(WAKEUP_LATENCY, cpu, latency, 0, stop, next);
  25417. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  25418. + if (timerandwakeup_enabled_data.enabled) {
  25419. + latency_hist(TIMERANDWAKEUP_LATENCY, cpu,
  25420. + next->timer_offset + latency, next->timer_offset,
  25421. + stop, next);
  25422. + }
  25423. +#endif
  25424. + }
  25425. +
  25426. +out_reset:
  25427. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  25428. + next->timer_offset = 0;
  25429. +#endif
  25430. + put_task_struct(cpu_wakeup_task);
  25431. + per_cpu(wakeup_task, cpu) = NULL;
  25432. +out:
  25433. + raw_spin_unlock_irqrestore(&wakeup_lock, flags);
  25434. +}
  25435. +#endif
  25436. +
  25437. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  25438. +static notrace void probe_hrtimer_interrupt(void *v, int cpu,
  25439. + long long latency_ns, struct task_struct *curr,
  25440. + struct task_struct *task)
  25441. +{
  25442. + if (latency_ns <= 0 && task != NULL && rt_task(task) &&
  25443. + (task->prio < curr->prio ||
  25444. + (task->prio == curr->prio &&
  25445. + !cpumask_test_cpu(cpu, &task->cpus_allowed)))) {
  25446. + long latency;
  25447. + cycle_t now;
  25448. +
  25449. + if (missed_timer_offsets_pid) {
  25450. + if (likely(missed_timer_offsets_pid !=
  25451. + task_pid_nr(task)))
  25452. + return;
  25453. + }
  25454. +
  25455. + now = ftrace_now(cpu);
  25456. + latency = (long) div_s64(-latency_ns, NSECS_PER_USECS);
  25457. + latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, latency, now,
  25458. + task);
  25459. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  25460. + task->timer_offset = latency;
  25461. +#endif
  25462. + }
  25463. +}
  25464. +#endif
  25465. +
  25466. +static __init int latency_hist_init(void)
  25467. +{
  25468. + struct dentry *latency_hist_root = NULL;
  25469. + struct dentry *dentry;
  25470. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  25471. + struct dentry *dentry_sharedprio;
  25472. +#endif
  25473. + struct dentry *entry;
  25474. + struct dentry *enable_root;
  25475. + int i = 0;
  25476. + struct hist_data *my_hist;
  25477. + char name[64];
  25478. + char *cpufmt = "CPU%d";
  25479. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  25480. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  25481. + char *cpufmt_maxlatproc = "max_latency-CPU%d";
  25482. + struct maxlatproc_data *mp = NULL;
  25483. +#endif
  25484. +
  25485. + dentry = tracing_init_dentry();
  25486. + latency_hist_root = debugfs_create_dir(latency_hist_dir_root, dentry);
  25487. + enable_root = debugfs_create_dir("enable", latency_hist_root);
  25488. +
  25489. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  25490. + dentry = debugfs_create_dir(irqsoff_hist_dir, latency_hist_root);
  25491. + for_each_possible_cpu(i) {
  25492. + sprintf(name, cpufmt, i);
  25493. + entry = debugfs_create_file(name, 0444, dentry,
  25494. + &per_cpu(irqsoff_hist, i), &latency_hist_fops);
  25495. + my_hist = &per_cpu(irqsoff_hist, i);
  25496. + atomic_set(&my_hist->hist_mode, 1);
  25497. + my_hist->min_lat = LONG_MAX;
  25498. + }
  25499. + entry = debugfs_create_file("reset", 0644, dentry,
  25500. + (void *)IRQSOFF_LATENCY, &latency_hist_reset_fops);
  25501. +#endif
  25502. +
  25503. +#ifdef CONFIG_PREEMPT_OFF_HIST
  25504. + dentry = debugfs_create_dir(preemptoff_hist_dir,
  25505. + latency_hist_root);
  25506. + for_each_possible_cpu(i) {
  25507. + sprintf(name, cpufmt, i);
  25508. + entry = debugfs_create_file(name, 0444, dentry,
  25509. + &per_cpu(preemptoff_hist, i), &latency_hist_fops);
  25510. + my_hist = &per_cpu(preemptoff_hist, i);
  25511. + atomic_set(&my_hist->hist_mode, 1);
  25512. + my_hist->min_lat = LONG_MAX;
  25513. + }
  25514. + entry = debugfs_create_file("reset", 0644, dentry,
  25515. + (void *)PREEMPTOFF_LATENCY, &latency_hist_reset_fops);
  25516. +#endif
  25517. +
  25518. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  25519. + dentry = debugfs_create_dir(preemptirqsoff_hist_dir,
  25520. + latency_hist_root);
  25521. + for_each_possible_cpu(i) {
  25522. + sprintf(name, cpufmt, i);
  25523. + entry = debugfs_create_file(name, 0444, dentry,
  25524. + &per_cpu(preemptirqsoff_hist, i), &latency_hist_fops);
  25525. + my_hist = &per_cpu(preemptirqsoff_hist, i);
  25526. + atomic_set(&my_hist->hist_mode, 1);
  25527. + my_hist->min_lat = LONG_MAX;
  25528. + }
  25529. + entry = debugfs_create_file("reset", 0644, dentry,
  25530. + (void *)PREEMPTIRQSOFF_LATENCY, &latency_hist_reset_fops);
  25531. +#endif
  25532. +
  25533. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  25534. + entry = debugfs_create_file("preemptirqsoff", 0644,
  25535. + enable_root, (void *)&preemptirqsoff_enabled_data,
  25536. + &enable_fops);
  25537. +#endif
  25538. +
  25539. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  25540. + dentry = debugfs_create_dir(wakeup_latency_hist_dir,
  25541. + latency_hist_root);
  25542. + dentry_sharedprio = debugfs_create_dir(
  25543. + wakeup_latency_hist_dir_sharedprio, dentry);
  25544. + for_each_possible_cpu(i) {
  25545. + sprintf(name, cpufmt, i);
  25546. +
  25547. + entry = debugfs_create_file(name, 0444, dentry,
  25548. + &per_cpu(wakeup_latency_hist, i),
  25549. + &latency_hist_fops);
  25550. + my_hist = &per_cpu(wakeup_latency_hist, i);
  25551. + atomic_set(&my_hist->hist_mode, 1);
  25552. + my_hist->min_lat = LONG_MAX;
  25553. +
  25554. + entry = debugfs_create_file(name, 0444, dentry_sharedprio,
  25555. + &per_cpu(wakeup_latency_hist_sharedprio, i),
  25556. + &latency_hist_fops);
  25557. + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, i);
  25558. + atomic_set(&my_hist->hist_mode, 1);
  25559. + my_hist->min_lat = LONG_MAX;
  25560. +
  25561. + sprintf(name, cpufmt_maxlatproc, i);
  25562. +
  25563. + mp = &per_cpu(wakeup_maxlatproc, i);
  25564. + entry = debugfs_create_file(name, 0444, dentry, mp,
  25565. + &maxlatproc_fops);
  25566. + clear_maxlatprocdata(mp);
  25567. +
  25568. + mp = &per_cpu(wakeup_maxlatproc_sharedprio, i);
  25569. + entry = debugfs_create_file(name, 0444, dentry_sharedprio, mp,
  25570. + &maxlatproc_fops);
  25571. + clear_maxlatprocdata(mp);
  25572. + }
  25573. + entry = debugfs_create_file("pid", 0644, dentry,
  25574. + (void *)&wakeup_pid, &pid_fops);
  25575. + entry = debugfs_create_file("reset", 0644, dentry,
  25576. + (void *)WAKEUP_LATENCY, &latency_hist_reset_fops);
  25577. + entry = debugfs_create_file("reset", 0644, dentry_sharedprio,
  25578. + (void *)WAKEUP_LATENCY_SHAREDPRIO, &latency_hist_reset_fops);
  25579. + entry = debugfs_create_file("wakeup", 0644,
  25580. + enable_root, (void *)&wakeup_latency_enabled_data,
  25581. + &enable_fops);
  25582. +#endif
  25583. +
  25584. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  25585. + dentry = debugfs_create_dir(missed_timer_offsets_dir,
  25586. + latency_hist_root);
  25587. + for_each_possible_cpu(i) {
  25588. + sprintf(name, cpufmt, i);
  25589. + entry = debugfs_create_file(name, 0444, dentry,
  25590. + &per_cpu(missed_timer_offsets, i), &latency_hist_fops);
  25591. + my_hist = &per_cpu(missed_timer_offsets, i);
  25592. + atomic_set(&my_hist->hist_mode, 1);
  25593. + my_hist->min_lat = LONG_MAX;
  25594. +
  25595. + sprintf(name, cpufmt_maxlatproc, i);
  25596. + mp = &per_cpu(missed_timer_offsets_maxlatproc, i);
  25597. + entry = debugfs_create_file(name, 0444, dentry, mp,
  25598. + &maxlatproc_fops);
  25599. + clear_maxlatprocdata(mp);
  25600. + }
  25601. + entry = debugfs_create_file("pid", 0644, dentry,
  25602. + (void *)&missed_timer_offsets_pid, &pid_fops);
  25603. + entry = debugfs_create_file("reset", 0644, dentry,
  25604. + (void *)MISSED_TIMER_OFFSETS, &latency_hist_reset_fops);
  25605. + entry = debugfs_create_file("missed_timer_offsets", 0644,
  25606. + enable_root, (void *)&missed_timer_offsets_enabled_data,
  25607. + &enable_fops);
  25608. +#endif
  25609. +
  25610. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  25611. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  25612. + dentry = debugfs_create_dir(timerandwakeup_latency_hist_dir,
  25613. + latency_hist_root);
  25614. + for_each_possible_cpu(i) {
  25615. + sprintf(name, cpufmt, i);
  25616. + entry = debugfs_create_file(name, 0444, dentry,
  25617. + &per_cpu(timerandwakeup_latency_hist, i),
  25618. + &latency_hist_fops);
  25619. + my_hist = &per_cpu(timerandwakeup_latency_hist, i);
  25620. + atomic_set(&my_hist->hist_mode, 1);
  25621. + my_hist->min_lat = LONG_MAX;
  25622. +
  25623. + sprintf(name, cpufmt_maxlatproc, i);
  25624. + mp = &per_cpu(timerandwakeup_maxlatproc, i);
  25625. + entry = debugfs_create_file(name, 0444, dentry, mp,
  25626. + &maxlatproc_fops);
  25627. + clear_maxlatprocdata(mp);
  25628. + }
  25629. + entry = debugfs_create_file("reset", 0644, dentry,
  25630. + (void *)TIMERANDWAKEUP_LATENCY, &latency_hist_reset_fops);
  25631. + entry = debugfs_create_file("timerandwakeup", 0644,
  25632. + enable_root, (void *)&timerandwakeup_enabled_data,
  25633. + &enable_fops);
  25634. +#endif
  25635. + return 0;
  25636. +}
  25637. +
  25638. +device_initcall(latency_hist_init);
  25639. diff -Nur linux-4.4.62.orig/kernel/trace/Makefile linux-4.4.62/kernel/trace/Makefile
  25640. --- linux-4.4.62.orig/kernel/trace/Makefile 2017-04-18 07:15:37.000000000 +0200
  25641. +++ linux-4.4.62/kernel/trace/Makefile 2017-04-18 17:38:08.226650485 +0200
  25642. @@ -40,6 +40,10 @@
  25643. obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
  25644. obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
  25645. obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
  25646. +obj-$(CONFIG_INTERRUPT_OFF_HIST) += latency_hist.o
  25647. +obj-$(CONFIG_PREEMPT_OFF_HIST) += latency_hist.o
  25648. +obj-$(CONFIG_WAKEUP_LATENCY_HIST) += latency_hist.o
  25649. +obj-$(CONFIG_MISSED_TIMER_OFFSETS_HIST) += latency_hist.o
  25650. obj-$(CONFIG_NOP_TRACER) += trace_nop.o
  25651. obj-$(CONFIG_STACK_TRACER) += trace_stack.o
  25652. obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
  25653. diff -Nur linux-4.4.62.orig/kernel/trace/trace.c linux-4.4.62/kernel/trace/trace.c
  25654. --- linux-4.4.62.orig/kernel/trace/trace.c 2017-04-18 07:15:37.000000000 +0200
  25655. +++ linux-4.4.62/kernel/trace/trace.c 2017-04-18 17:38:08.226650485 +0200
  25656. @@ -1652,6 +1652,7 @@
  25657. struct task_struct *tsk = current;
  25658. entry->preempt_count = pc & 0xff;
  25659. + entry->preempt_lazy_count = preempt_lazy_count();
  25660. entry->pid = (tsk) ? tsk->pid : 0;
  25661. entry->flags =
  25662. #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
  25663. @@ -1661,8 +1662,11 @@
  25664. #endif
  25665. ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
  25666. ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
  25667. - (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
  25668. + (tif_need_resched_now() ? TRACE_FLAG_NEED_RESCHED : 0) |
  25669. + (need_resched_lazy() ? TRACE_FLAG_NEED_RESCHED_LAZY : 0) |
  25670. (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
  25671. +
  25672. + entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0;
  25673. }
  25674. EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
  25675. @@ -2555,14 +2559,17 @@
  25676. static void print_lat_help_header(struct seq_file *m)
  25677. {
  25678. - seq_puts(m, "# _------=> CPU# \n"
  25679. - "# / _-----=> irqs-off \n"
  25680. - "# | / _----=> need-resched \n"
  25681. - "# || / _---=> hardirq/softirq \n"
  25682. - "# ||| / _--=> preempt-depth \n"
  25683. - "# |||| / delay \n"
  25684. - "# cmd pid ||||| time | caller \n"
  25685. - "# \\ / ||||| \\ | / \n");
  25686. + seq_puts(m, "# _--------=> CPU# \n"
  25687. + "# / _-------=> irqs-off \n"
  25688. + "# | / _------=> need-resched \n"
  25689. + "# || / _-----=> need-resched_lazy \n"
  25690. + "# ||| / _----=> hardirq/softirq \n"
  25691. + "# |||| / _---=> preempt-depth \n"
  25692. + "# ||||| / _--=> preempt-lazy-depth\n"
  25693. + "# |||||| / _-=> migrate-disable \n"
  25694. + "# ||||||| / delay \n"
  25695. + "# cmd pid |||||||| time | caller \n"
  25696. + "# \\ / |||||||| \\ | / \n");
  25697. }
  25698. static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
  25699. @@ -2588,11 +2595,14 @@
  25700. print_event_info(buf, m);
  25701. seq_puts(m, "# _-----=> irqs-off\n"
  25702. "# / _----=> need-resched\n"
  25703. - "# | / _---=> hardirq/softirq\n"
  25704. - "# || / _--=> preempt-depth\n"
  25705. - "# ||| / delay\n"
  25706. - "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n"
  25707. - "# | | | |||| | |\n");
  25708. + "# |/ _-----=> need-resched_lazy\n"
  25709. + "# || / _---=> hardirq/softirq\n"
  25710. + "# ||| / _--=> preempt-depth\n"
  25711. + "# |||| / _-=> preempt-lazy-depth\n"
  25712. + "# ||||| / _-=> migrate-disable \n"
  25713. + "# |||||| / delay\n"
  25714. + "# TASK-PID CPU# ||||||| TIMESTAMP FUNCTION\n"
  25715. + "# | | | ||||||| | |\n");
  25716. }
  25717. void
  25718. diff -Nur linux-4.4.62.orig/kernel/trace/trace_events.c linux-4.4.62/kernel/trace/trace_events.c
  25719. --- linux-4.4.62.orig/kernel/trace/trace_events.c 2017-04-18 07:15:37.000000000 +0200
  25720. +++ linux-4.4.62/kernel/trace/trace_events.c 2017-04-18 17:38:08.226650485 +0200
  25721. @@ -188,6 +188,8 @@
  25722. __common_field(unsigned char, flags);
  25723. __common_field(unsigned char, preempt_count);
  25724. __common_field(int, pid);
  25725. + __common_field(unsigned short, migrate_disable);
  25726. + __common_field(unsigned short, padding);
  25727. return ret;
  25728. }
  25729. @@ -244,6 +246,14 @@
  25730. local_save_flags(fbuffer->flags);
  25731. fbuffer->pc = preempt_count();
  25732. + /*
  25733. + * If CONFIG_PREEMPT is enabled, then the tracepoint itself disables
  25734. + * preemption (adding one to the preempt_count). Since we are
  25735. + * interested in the preempt_count at the time the tracepoint was
  25736. + * hit, we need to subtract one to offset the increment.
  25737. + */
  25738. + if (IS_ENABLED(CONFIG_PREEMPT))
  25739. + fbuffer->pc--;
  25740. fbuffer->trace_file = trace_file;
  25741. fbuffer->event =
  25742. diff -Nur linux-4.4.62.orig/kernel/trace/trace.h linux-4.4.62/kernel/trace/trace.h
  25743. --- linux-4.4.62.orig/kernel/trace/trace.h 2017-04-18 07:15:37.000000000 +0200
  25744. +++ linux-4.4.62/kernel/trace/trace.h 2017-04-18 17:38:08.226650485 +0200
  25745. @@ -117,6 +117,7 @@
  25746. * NEED_RESCHED - reschedule is requested
  25747. * HARDIRQ - inside an interrupt handler
  25748. * SOFTIRQ - inside a softirq handler
  25749. + * NEED_RESCHED_LAZY - lazy reschedule is requested
  25750. */
  25751. enum trace_flag_type {
  25752. TRACE_FLAG_IRQS_OFF = 0x01,
  25753. @@ -125,6 +126,7 @@
  25754. TRACE_FLAG_HARDIRQ = 0x08,
  25755. TRACE_FLAG_SOFTIRQ = 0x10,
  25756. TRACE_FLAG_PREEMPT_RESCHED = 0x20,
  25757. + TRACE_FLAG_NEED_RESCHED_LAZY = 0x40,
  25758. };
  25759. #define TRACE_BUF_SIZE 1024
  25760. diff -Nur linux-4.4.62.orig/kernel/trace/trace_irqsoff.c linux-4.4.62/kernel/trace/trace_irqsoff.c
  25761. --- linux-4.4.62.orig/kernel/trace/trace_irqsoff.c 2017-04-18 07:15:37.000000000 +0200
  25762. +++ linux-4.4.62/kernel/trace/trace_irqsoff.c 2017-04-18 17:38:08.226650485 +0200
  25763. @@ -13,6 +13,7 @@
  25764. #include <linux/uaccess.h>
  25765. #include <linux/module.h>
  25766. #include <linux/ftrace.h>
  25767. +#include <trace/events/hist.h>
  25768. #include "trace.h"
  25769. @@ -424,11 +425,13 @@
  25770. {
  25771. if (preempt_trace() || irq_trace())
  25772. start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  25773. + trace_preemptirqsoff_hist_rcuidle(TRACE_START, 1);
  25774. }
  25775. EXPORT_SYMBOL_GPL(start_critical_timings);
  25776. void stop_critical_timings(void)
  25777. {
  25778. + trace_preemptirqsoff_hist_rcuidle(TRACE_STOP, 0);
  25779. if (preempt_trace() || irq_trace())
  25780. stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  25781. }
  25782. @@ -438,6 +441,7 @@
  25783. #ifdef CONFIG_PROVE_LOCKING
  25784. void time_hardirqs_on(unsigned long a0, unsigned long a1)
  25785. {
  25786. + trace_preemptirqsoff_hist_rcuidle(IRQS_ON, 0);
  25787. if (!preempt_trace() && irq_trace())
  25788. stop_critical_timing(a0, a1);
  25789. }
  25790. @@ -446,6 +450,7 @@
  25791. {
  25792. if (!preempt_trace() && irq_trace())
  25793. start_critical_timing(a0, a1);
  25794. + trace_preemptirqsoff_hist_rcuidle(IRQS_OFF, 1);
  25795. }
  25796. #else /* !CONFIG_PROVE_LOCKING */
  25797. @@ -471,6 +476,7 @@
  25798. */
  25799. void trace_hardirqs_on(void)
  25800. {
  25801. + trace_preemptirqsoff_hist(IRQS_ON, 0);
  25802. if (!preempt_trace() && irq_trace())
  25803. stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  25804. }
  25805. @@ -480,11 +486,13 @@
  25806. {
  25807. if (!preempt_trace() && irq_trace())
  25808. start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  25809. + trace_preemptirqsoff_hist(IRQS_OFF, 1);
  25810. }
  25811. EXPORT_SYMBOL(trace_hardirqs_off);
  25812. __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
  25813. {
  25814. + trace_preemptirqsoff_hist(IRQS_ON, 0);
  25815. if (!preempt_trace() && irq_trace())
  25816. stop_critical_timing(CALLER_ADDR0, caller_addr);
  25817. }
  25818. @@ -494,6 +502,7 @@
  25819. {
  25820. if (!preempt_trace() && irq_trace())
  25821. start_critical_timing(CALLER_ADDR0, caller_addr);
  25822. + trace_preemptirqsoff_hist(IRQS_OFF, 1);
  25823. }
  25824. EXPORT_SYMBOL(trace_hardirqs_off_caller);
  25825. @@ -503,12 +512,14 @@
  25826. #ifdef CONFIG_PREEMPT_TRACER
  25827. void trace_preempt_on(unsigned long a0, unsigned long a1)
  25828. {
  25829. + trace_preemptirqsoff_hist(PREEMPT_ON, 0);
  25830. if (preempt_trace() && !irq_trace())
  25831. stop_critical_timing(a0, a1);
  25832. }
  25833. void trace_preempt_off(unsigned long a0, unsigned long a1)
  25834. {
  25835. + trace_preemptirqsoff_hist(PREEMPT_ON, 1);
  25836. if (preempt_trace() && !irq_trace())
  25837. start_critical_timing(a0, a1);
  25838. }
  25839. diff -Nur linux-4.4.62.orig/kernel/trace/trace_output.c linux-4.4.62/kernel/trace/trace_output.c
  25840. --- linux-4.4.62.orig/kernel/trace/trace_output.c 2017-04-18 07:15:37.000000000 +0200
  25841. +++ linux-4.4.62/kernel/trace/trace_output.c 2017-04-18 17:38:08.226650485 +0200
  25842. @@ -386,6 +386,7 @@
  25843. {
  25844. char hardsoft_irq;
  25845. char need_resched;
  25846. + char need_resched_lazy;
  25847. char irqs_off;
  25848. int hardirq;
  25849. int softirq;
  25850. @@ -413,6 +414,8 @@
  25851. need_resched = '.';
  25852. break;
  25853. }
  25854. + need_resched_lazy =
  25855. + (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.';
  25856. hardsoft_irq =
  25857. (hardirq && softirq) ? 'H' :
  25858. @@ -420,14 +423,25 @@
  25859. softirq ? 's' :
  25860. '.';
  25861. - trace_seq_printf(s, "%c%c%c",
  25862. - irqs_off, need_resched, hardsoft_irq);
  25863. + trace_seq_printf(s, "%c%c%c%c",
  25864. + irqs_off, need_resched, need_resched_lazy,
  25865. + hardsoft_irq);
  25866. if (entry->preempt_count)
  25867. trace_seq_printf(s, "%x", entry->preempt_count);
  25868. else
  25869. trace_seq_putc(s, '.');
  25870. + if (entry->preempt_lazy_count)
  25871. + trace_seq_printf(s, "%x", entry->preempt_lazy_count);
  25872. + else
  25873. + trace_seq_putc(s, '.');
  25874. +
  25875. + if (entry->migrate_disable)
  25876. + trace_seq_printf(s, "%x", entry->migrate_disable);
  25877. + else
  25878. + trace_seq_putc(s, '.');
  25879. +
  25880. return !trace_seq_has_overflowed(s);
  25881. }
  25882. diff -Nur linux-4.4.62.orig/kernel/user.c linux-4.4.62/kernel/user.c
  25883. --- linux-4.4.62.orig/kernel/user.c 2017-04-18 07:15:37.000000000 +0200
  25884. +++ linux-4.4.62/kernel/user.c 2017-04-18 17:38:08.226650485 +0200
  25885. @@ -161,11 +161,11 @@
  25886. if (!up)
  25887. return;
  25888. - local_irq_save(flags);
  25889. + local_irq_save_nort(flags);
  25890. if (atomic_dec_and_lock(&up->__count, &uidhash_lock))
  25891. free_user(up, flags);
  25892. else
  25893. - local_irq_restore(flags);
  25894. + local_irq_restore_nort(flags);
  25895. }
  25896. struct user_struct *alloc_uid(kuid_t uid)
  25897. diff -Nur linux-4.4.62.orig/kernel/watchdog.c linux-4.4.62/kernel/watchdog.c
  25898. --- linux-4.4.62.orig/kernel/watchdog.c 2017-04-18 07:15:37.000000000 +0200
  25899. +++ linux-4.4.62/kernel/watchdog.c 2017-04-18 17:38:08.226650485 +0200
  25900. @@ -299,6 +299,8 @@
  25901. #ifdef CONFIG_HARDLOCKUP_DETECTOR
  25902. +static DEFINE_RAW_SPINLOCK(watchdog_output_lock);
  25903. +
  25904. static struct perf_event_attr wd_hw_attr = {
  25905. .type = PERF_TYPE_HARDWARE,
  25906. .config = PERF_COUNT_HW_CPU_CYCLES,
  25907. @@ -332,6 +334,13 @@
  25908. /* only print hardlockups once */
  25909. if (__this_cpu_read(hard_watchdog_warn) == true)
  25910. return;
  25911. + /*
  25912. + * If early-printk is enabled then make sure we do not
  25913. + * lock up in printk() and kill console logging:
  25914. + */
  25915. + printk_kill();
  25916. +
  25917. + raw_spin_lock(&watchdog_output_lock);
  25918. pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
  25919. print_modules();
  25920. @@ -349,8 +358,9 @@
  25921. !test_and_set_bit(0, &hardlockup_allcpu_dumped))
  25922. trigger_allbutself_cpu_backtrace();
  25923. + raw_spin_unlock(&watchdog_output_lock);
  25924. if (hardlockup_panic)
  25925. - panic("Hard LOCKUP");
  25926. + nmi_panic(regs, "Hard LOCKUP");
  25927. __this_cpu_write(hard_watchdog_warn, true);
  25928. return;
  25929. @@ -496,6 +506,7 @@
  25930. /* kick off the timer for the hardlockup detector */
  25931. hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  25932. hrtimer->function = watchdog_timer_fn;
  25933. + hrtimer->irqsafe = 1;
  25934. /* Enable the perf event */
  25935. watchdog_nmi_enable(cpu);
  25936. diff -Nur linux-4.4.62.orig/kernel/workqueue.c linux-4.4.62/kernel/workqueue.c
  25937. --- linux-4.4.62.orig/kernel/workqueue.c 2017-04-18 07:15:37.000000000 +0200
  25938. +++ linux-4.4.62/kernel/workqueue.c 2017-04-18 17:38:08.230650641 +0200
  25939. @@ -48,6 +48,8 @@
  25940. #include <linux/nodemask.h>
  25941. #include <linux/moduleparam.h>
  25942. #include <linux/uaccess.h>
  25943. +#include <linux/locallock.h>
  25944. +#include <linux/delay.h>
  25945. #include "workqueue_internal.h"
  25946. @@ -121,11 +123,16 @@
  25947. * cpu or grabbing pool->lock is enough for read access. If
  25948. * POOL_DISASSOCIATED is set, it's identical to L.
  25949. *
  25950. + * On RT we need the extra protection via rt_lock_idle_list() for
  25951. + * the list manipulations against read access from
  25952. + * wq_worker_sleeping(). All other places are nicely serialized via
  25953. + * pool->lock.
  25954. + *
  25955. * A: pool->attach_mutex protected.
  25956. *
  25957. * PL: wq_pool_mutex protected.
  25958. *
  25959. - * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads.
  25960. + * PR: wq_pool_mutex protected for writes. RCU protected for reads.
  25961. *
  25962. * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads.
  25963. *
  25964. @@ -134,7 +141,7 @@
  25965. *
  25966. * WQ: wq->mutex protected.
  25967. *
  25968. - * WR: wq->mutex protected for writes. Sched-RCU protected for reads.
  25969. + * WR: wq->mutex protected for writes. RCU protected for reads.
  25970. *
  25971. * MD: wq_mayday_lock protected.
  25972. */
  25973. @@ -183,7 +190,7 @@
  25974. atomic_t nr_running ____cacheline_aligned_in_smp;
  25975. /*
  25976. - * Destruction of pool is sched-RCU protected to allow dereferences
  25977. + * Destruction of pool is RCU protected to allow dereferences
  25978. * from get_work_pool().
  25979. */
  25980. struct rcu_head rcu;
  25981. @@ -212,7 +219,7 @@
  25982. /*
  25983. * Release of unbound pwq is punted to system_wq. See put_pwq()
  25984. * and pwq_unbound_release_workfn() for details. pool_workqueue
  25985. - * itself is also sched-RCU protected so that the first pwq can be
  25986. + * itself is also RCU protected so that the first pwq can be
  25987. * determined without grabbing wq->mutex.
  25988. */
  25989. struct work_struct unbound_release_work;
  25990. @@ -331,6 +338,8 @@
  25991. struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
  25992. EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
  25993. +static DEFINE_LOCAL_IRQ_LOCK(pendingb_lock);
  25994. +
  25995. static int worker_thread(void *__worker);
  25996. static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
  25997. @@ -338,20 +347,20 @@
  25998. #include <trace/events/workqueue.h>
  25999. #define assert_rcu_or_pool_mutex() \
  26000. - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
  26001. + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
  26002. !lockdep_is_held(&wq_pool_mutex), \
  26003. - "sched RCU or wq_pool_mutex should be held")
  26004. + "RCU or wq_pool_mutex should be held")
  26005. #define assert_rcu_or_wq_mutex(wq) \
  26006. - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
  26007. + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
  26008. !lockdep_is_held(&wq->mutex), \
  26009. - "sched RCU or wq->mutex should be held")
  26010. + "RCU or wq->mutex should be held")
  26011. #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
  26012. - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
  26013. + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
  26014. !lockdep_is_held(&wq->mutex) && \
  26015. !lockdep_is_held(&wq_pool_mutex), \
  26016. - "sched RCU, wq->mutex or wq_pool_mutex should be held")
  26017. + "RCU, wq->mutex or wq_pool_mutex should be held")
  26018. #define for_each_cpu_worker_pool(pool, cpu) \
  26019. for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
  26020. @@ -363,7 +372,7 @@
  26021. * @pool: iteration cursor
  26022. * @pi: integer used for iteration
  26023. *
  26024. - * This must be called either with wq_pool_mutex held or sched RCU read
  26025. + * This must be called either with wq_pool_mutex held or RCU read
  26026. * locked. If the pool needs to be used beyond the locking in effect, the
  26027. * caller is responsible for guaranteeing that the pool stays online.
  26028. *
  26029. @@ -395,7 +404,7 @@
  26030. * @pwq: iteration cursor
  26031. * @wq: the target workqueue
  26032. *
  26033. - * This must be called either with wq->mutex held or sched RCU read locked.
  26034. + * This must be called either with wq->mutex held or RCU read locked.
  26035. * If the pwq needs to be used beyond the locking in effect, the caller is
  26036. * responsible for guaranteeing that the pwq stays online.
  26037. *
  26038. @@ -407,6 +416,31 @@
  26039. if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
  26040. else
  26041. +#ifdef CONFIG_PREEMPT_RT_BASE
  26042. +static inline void rt_lock_idle_list(struct worker_pool *pool)
  26043. +{
  26044. + preempt_disable();
  26045. +}
  26046. +static inline void rt_unlock_idle_list(struct worker_pool *pool)
  26047. +{
  26048. + preempt_enable();
  26049. +}
  26050. +static inline void sched_lock_idle_list(struct worker_pool *pool) { }
  26051. +static inline void sched_unlock_idle_list(struct worker_pool *pool) { }
  26052. +#else
  26053. +static inline void rt_lock_idle_list(struct worker_pool *pool) { }
  26054. +static inline void rt_unlock_idle_list(struct worker_pool *pool) { }
  26055. +static inline void sched_lock_idle_list(struct worker_pool *pool)
  26056. +{
  26057. + spin_lock_irq(&pool->lock);
  26058. +}
  26059. +static inline void sched_unlock_idle_list(struct worker_pool *pool)
  26060. +{
  26061. + spin_unlock_irq(&pool->lock);
  26062. +}
  26063. +#endif
  26064. +
  26065. +
  26066. #ifdef CONFIG_DEBUG_OBJECTS_WORK
  26067. static struct debug_obj_descr work_debug_descr;
  26068. @@ -557,7 +591,7 @@
  26069. * @wq: the target workqueue
  26070. * @node: the node ID
  26071. *
  26072. - * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU
  26073. + * This must be called with any of wq_pool_mutex, wq->mutex or RCU
  26074. * read locked.
  26075. * If the pwq needs to be used beyond the locking in effect, the caller is
  26076. * responsible for guaranteeing that the pwq stays online.
  26077. @@ -701,8 +735,8 @@
  26078. * @work: the work item of interest
  26079. *
  26080. * Pools are created and destroyed under wq_pool_mutex, and allows read
  26081. - * access under sched-RCU read lock. As such, this function should be
  26082. - * called under wq_pool_mutex or with preemption disabled.
  26083. + * access under RCU read lock. As such, this function should be
  26084. + * called under wq_pool_mutex or inside of a rcu_read_lock() region.
  26085. *
  26086. * All fields of the returned pool are accessible as long as the above
  26087. * mentioned locking is in effect. If the returned pool needs to be used
  26088. @@ -839,51 +873,44 @@
  26089. */
  26090. static void wake_up_worker(struct worker_pool *pool)
  26091. {
  26092. - struct worker *worker = first_idle_worker(pool);
  26093. + struct worker *worker;
  26094. +
  26095. + rt_lock_idle_list(pool);
  26096. +
  26097. + worker = first_idle_worker(pool);
  26098. if (likely(worker))
  26099. wake_up_process(worker->task);
  26100. +
  26101. + rt_unlock_idle_list(pool);
  26102. }
  26103. /**
  26104. - * wq_worker_waking_up - a worker is waking up
  26105. - * @task: task waking up
  26106. - * @cpu: CPU @task is waking up to
  26107. + * wq_worker_running - a worker is running again
  26108. + * @task: task returning from sleep
  26109. *
  26110. - * This function is called during try_to_wake_up() when a worker is
  26111. - * being awoken.
  26112. - *
  26113. - * CONTEXT:
  26114. - * spin_lock_irq(rq->lock)
  26115. + * This function is called when a worker returns from schedule()
  26116. */
  26117. -void wq_worker_waking_up(struct task_struct *task, int cpu)
  26118. +void wq_worker_running(struct task_struct *task)
  26119. {
  26120. struct worker *worker = kthread_data(task);
  26121. - if (!(worker->flags & WORKER_NOT_RUNNING)) {
  26122. - WARN_ON_ONCE(worker->pool->cpu != cpu);
  26123. + if (!worker->sleeping)
  26124. + return;
  26125. + if (!(worker->flags & WORKER_NOT_RUNNING))
  26126. atomic_inc(&worker->pool->nr_running);
  26127. - }
  26128. + worker->sleeping = 0;
  26129. }
  26130. /**
  26131. * wq_worker_sleeping - a worker is going to sleep
  26132. * @task: task going to sleep
  26133. - * @cpu: CPU in question, must be the current CPU number
  26134. - *
  26135. - * This function is called during schedule() when a busy worker is
  26136. - * going to sleep. Worker on the same cpu can be woken up by
  26137. - * returning pointer to its task.
  26138. - *
  26139. - * CONTEXT:
  26140. - * spin_lock_irq(rq->lock)
  26141. - *
  26142. - * Return:
  26143. - * Worker task on @cpu to wake up, %NULL if none.
  26144. + * This function is called from schedule() when a busy worker is
  26145. + * going to sleep.
  26146. */
  26147. -struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
  26148. +void wq_worker_sleeping(struct task_struct *task)
  26149. {
  26150. - struct worker *worker = kthread_data(task), *to_wakeup = NULL;
  26151. + struct worker *worker = kthread_data(task);
  26152. struct worker_pool *pool;
  26153. /*
  26154. @@ -892,29 +919,26 @@
  26155. * checking NOT_RUNNING.
  26156. */
  26157. if (worker->flags & WORKER_NOT_RUNNING)
  26158. - return NULL;
  26159. + return;
  26160. pool = worker->pool;
  26161. - /* this can only happen on the local cpu */
  26162. - if (WARN_ON_ONCE(cpu != raw_smp_processor_id() || pool->cpu != cpu))
  26163. - return NULL;
  26164. + if (WARN_ON_ONCE(worker->sleeping))
  26165. + return;
  26166. +
  26167. + worker->sleeping = 1;
  26168. /*
  26169. * The counterpart of the following dec_and_test, implied mb,
  26170. * worklist not empty test sequence is in insert_work().
  26171. * Please read comment there.
  26172. - *
  26173. - * NOT_RUNNING is clear. This means that we're bound to and
  26174. - * running on the local cpu w/ rq lock held and preemption
  26175. - * disabled, which in turn means that none else could be
  26176. - * manipulating idle_list, so dereferencing idle_list without pool
  26177. - * lock is safe.
  26178. */
  26179. if (atomic_dec_and_test(&pool->nr_running) &&
  26180. - !list_empty(&pool->worklist))
  26181. - to_wakeup = first_idle_worker(pool);
  26182. - return to_wakeup ? to_wakeup->task : NULL;
  26183. + !list_empty(&pool->worklist)) {
  26184. + sched_lock_idle_list(pool);
  26185. + wake_up_worker(pool);
  26186. + sched_unlock_idle_list(pool);
  26187. + }
  26188. }
  26189. /**
  26190. @@ -1108,12 +1132,14 @@
  26191. {
  26192. if (pwq) {
  26193. /*
  26194. - * As both pwqs and pools are sched-RCU protected, the
  26195. + * As both pwqs and pools are RCU protected, the
  26196. * following lock operations are safe.
  26197. */
  26198. - spin_lock_irq(&pwq->pool->lock);
  26199. + rcu_read_lock();
  26200. + local_spin_lock_irq(pendingb_lock, &pwq->pool->lock);
  26201. put_pwq(pwq);
  26202. - spin_unlock_irq(&pwq->pool->lock);
  26203. + local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock);
  26204. + rcu_read_unlock();
  26205. }
  26206. }
  26207. @@ -1215,7 +1241,7 @@
  26208. struct worker_pool *pool;
  26209. struct pool_workqueue *pwq;
  26210. - local_irq_save(*flags);
  26211. + local_lock_irqsave(pendingb_lock, *flags);
  26212. /* try to steal the timer if it exists */
  26213. if (is_dwork) {
  26214. @@ -1234,6 +1260,7 @@
  26215. if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
  26216. return 0;
  26217. + rcu_read_lock();
  26218. /*
  26219. * The queueing is in progress, or it is already queued. Try to
  26220. * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
  26221. @@ -1272,14 +1299,16 @@
  26222. set_work_pool_and_keep_pending(work, pool->id);
  26223. spin_unlock(&pool->lock);
  26224. + rcu_read_unlock();
  26225. return 1;
  26226. }
  26227. spin_unlock(&pool->lock);
  26228. fail:
  26229. - local_irq_restore(*flags);
  26230. + rcu_read_unlock();
  26231. + local_unlock_irqrestore(pendingb_lock, *flags);
  26232. if (work_is_canceling(work))
  26233. return -ENOENT;
  26234. - cpu_relax();
  26235. + cpu_chill();
  26236. return -EAGAIN;
  26237. }
  26238. @@ -1348,7 +1377,7 @@
  26239. * queued or lose PENDING. Grabbing PENDING and queueing should
  26240. * happen with IRQ disabled.
  26241. */
  26242. - WARN_ON_ONCE(!irqs_disabled());
  26243. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  26244. debug_work_activate(work);
  26245. @@ -1356,6 +1385,8 @@
  26246. if (unlikely(wq->flags & __WQ_DRAINING) &&
  26247. WARN_ON_ONCE(!is_chained_work(wq)))
  26248. return;
  26249. +
  26250. + rcu_read_lock();
  26251. retry:
  26252. if (req_cpu == WORK_CPU_UNBOUND)
  26253. cpu = raw_smp_processor_id();
  26254. @@ -1412,10 +1443,8 @@
  26255. /* pwq determined, queue */
  26256. trace_workqueue_queue_work(req_cpu, pwq, work);
  26257. - if (WARN_ON(!list_empty(&work->entry))) {
  26258. - spin_unlock(&pwq->pool->lock);
  26259. - return;
  26260. - }
  26261. + if (WARN_ON(!list_empty(&work->entry)))
  26262. + goto out;
  26263. pwq->nr_in_flight[pwq->work_color]++;
  26264. work_flags = work_color_to_flags(pwq->work_color);
  26265. @@ -1431,7 +1460,9 @@
  26266. insert_work(pwq, work, worklist, work_flags);
  26267. +out:
  26268. spin_unlock(&pwq->pool->lock);
  26269. + rcu_read_unlock();
  26270. }
  26271. /**
  26272. @@ -1451,14 +1482,14 @@
  26273. bool ret = false;
  26274. unsigned long flags;
  26275. - local_irq_save(flags);
  26276. + local_lock_irqsave(pendingb_lock,flags);
  26277. if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
  26278. __queue_work(cpu, wq, work);
  26279. ret = true;
  26280. }
  26281. - local_irq_restore(flags);
  26282. + local_unlock_irqrestore(pendingb_lock, flags);
  26283. return ret;
  26284. }
  26285. EXPORT_SYMBOL(queue_work_on);
  26286. @@ -1525,14 +1556,14 @@
  26287. unsigned long flags;
  26288. /* read the comment in __queue_work() */
  26289. - local_irq_save(flags);
  26290. + local_lock_irqsave(pendingb_lock, flags);
  26291. if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
  26292. __queue_delayed_work(cpu, wq, dwork, delay);
  26293. ret = true;
  26294. }
  26295. - local_irq_restore(flags);
  26296. + local_unlock_irqrestore(pendingb_lock, flags);
  26297. return ret;
  26298. }
  26299. EXPORT_SYMBOL(queue_delayed_work_on);
  26300. @@ -1567,7 +1598,7 @@
  26301. if (likely(ret >= 0)) {
  26302. __queue_delayed_work(cpu, wq, dwork, delay);
  26303. - local_irq_restore(flags);
  26304. + local_unlock_irqrestore(pendingb_lock, flags);
  26305. }
  26306. /* -ENOENT from try_to_grab_pending() becomes %true */
  26307. @@ -1600,7 +1631,9 @@
  26308. worker->last_active = jiffies;
  26309. /* idle_list is LIFO */
  26310. + rt_lock_idle_list(pool);
  26311. list_add(&worker->entry, &pool->idle_list);
  26312. + rt_unlock_idle_list(pool);
  26313. if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
  26314. mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
  26315. @@ -1633,7 +1666,9 @@
  26316. return;
  26317. worker_clr_flags(worker, WORKER_IDLE);
  26318. pool->nr_idle--;
  26319. + rt_lock_idle_list(pool);
  26320. list_del_init(&worker->entry);
  26321. + rt_unlock_idle_list(pool);
  26322. }
  26323. static struct worker *alloc_worker(int node)
  26324. @@ -1799,7 +1834,9 @@
  26325. pool->nr_workers--;
  26326. pool->nr_idle--;
  26327. + rt_lock_idle_list(pool);
  26328. list_del_init(&worker->entry);
  26329. + rt_unlock_idle_list(pool);
  26330. worker->flags |= WORKER_DIE;
  26331. wake_up_process(worker->task);
  26332. }
  26333. @@ -2716,14 +2753,14 @@
  26334. might_sleep();
  26335. - local_irq_disable();
  26336. + rcu_read_lock();
  26337. pool = get_work_pool(work);
  26338. if (!pool) {
  26339. - local_irq_enable();
  26340. + rcu_read_unlock();
  26341. return false;
  26342. }
  26343. - spin_lock(&pool->lock);
  26344. + spin_lock_irq(&pool->lock);
  26345. /* see the comment in try_to_grab_pending() with the same code */
  26346. pwq = get_work_pwq(work);
  26347. if (pwq) {
  26348. @@ -2750,10 +2787,11 @@
  26349. else
  26350. lock_map_acquire_read(&pwq->wq->lockdep_map);
  26351. lock_map_release(&pwq->wq->lockdep_map);
  26352. -
  26353. + rcu_read_unlock();
  26354. return true;
  26355. already_gone:
  26356. spin_unlock_irq(&pool->lock);
  26357. + rcu_read_unlock();
  26358. return false;
  26359. }
  26360. @@ -2840,7 +2878,7 @@
  26361. /* tell other tasks trying to grab @work to back off */
  26362. mark_work_canceling(work);
  26363. - local_irq_restore(flags);
  26364. + local_unlock_irqrestore(pendingb_lock, flags);
  26365. flush_work(work);
  26366. clear_work_data(work);
  26367. @@ -2895,10 +2933,10 @@
  26368. */
  26369. bool flush_delayed_work(struct delayed_work *dwork)
  26370. {
  26371. - local_irq_disable();
  26372. + local_lock_irq(pendingb_lock);
  26373. if (del_timer_sync(&dwork->timer))
  26374. __queue_work(dwork->cpu, dwork->wq, &dwork->work);
  26375. - local_irq_enable();
  26376. + local_unlock_irq(pendingb_lock);
  26377. return flush_work(&dwork->work);
  26378. }
  26379. EXPORT_SYMBOL(flush_delayed_work);
  26380. @@ -2933,7 +2971,7 @@
  26381. set_work_pool_and_clear_pending(&dwork->work,
  26382. get_work_pool_id(&dwork->work));
  26383. - local_irq_restore(flags);
  26384. + local_unlock_irqrestore(pendingb_lock, flags);
  26385. return ret;
  26386. }
  26387. EXPORT_SYMBOL(cancel_delayed_work);
  26388. @@ -3161,7 +3199,7 @@
  26389. * put_unbound_pool - put a worker_pool
  26390. * @pool: worker_pool to put
  26391. *
  26392. - * Put @pool. If its refcnt reaches zero, it gets destroyed in sched-RCU
  26393. + * Put @pool. If its refcnt reaches zero, it gets destroyed in RCU
  26394. * safe manner. get_unbound_pool() calls this function on its failure path
  26395. * and this function should be able to release pools which went through,
  26396. * successfully or not, init_worker_pool().
  26397. @@ -3215,8 +3253,8 @@
  26398. del_timer_sync(&pool->idle_timer);
  26399. del_timer_sync(&pool->mayday_timer);
  26400. - /* sched-RCU protected to allow dereferences from get_work_pool() */
  26401. - call_rcu_sched(&pool->rcu, rcu_free_pool);
  26402. + /* RCU protected to allow dereferences from get_work_pool() */
  26403. + call_rcu(&pool->rcu, rcu_free_pool);
  26404. }
  26405. /**
  26406. @@ -3323,14 +3361,14 @@
  26407. put_unbound_pool(pool);
  26408. mutex_unlock(&wq_pool_mutex);
  26409. - call_rcu_sched(&pwq->rcu, rcu_free_pwq);
  26410. + call_rcu(&pwq->rcu, rcu_free_pwq);
  26411. /*
  26412. * If we're the last pwq going away, @wq is already dead and no one
  26413. * is gonna access it anymore. Schedule RCU free.
  26414. */
  26415. if (is_last)
  26416. - call_rcu_sched(&wq->rcu, rcu_free_wq);
  26417. + call_rcu(&wq->rcu, rcu_free_wq);
  26418. }
  26419. /**
  26420. @@ -3983,7 +4021,7 @@
  26421. * The base ref is never dropped on per-cpu pwqs. Directly
  26422. * schedule RCU free.
  26423. */
  26424. - call_rcu_sched(&wq->rcu, rcu_free_wq);
  26425. + call_rcu(&wq->rcu, rcu_free_wq);
  26426. } else {
  26427. /*
  26428. * We're the sole accessor of @wq at this point. Directly
  26429. @@ -4076,7 +4114,8 @@
  26430. struct pool_workqueue *pwq;
  26431. bool ret;
  26432. - rcu_read_lock_sched();
  26433. + rcu_read_lock();
  26434. + preempt_disable();
  26435. if (cpu == WORK_CPU_UNBOUND)
  26436. cpu = smp_processor_id();
  26437. @@ -4087,7 +4126,8 @@
  26438. pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
  26439. ret = !list_empty(&pwq->delayed_works);
  26440. - rcu_read_unlock_sched();
  26441. + preempt_enable();
  26442. + rcu_read_unlock();
  26443. return ret;
  26444. }
  26445. @@ -4113,15 +4153,15 @@
  26446. if (work_pending(work))
  26447. ret |= WORK_BUSY_PENDING;
  26448. - local_irq_save(flags);
  26449. + rcu_read_lock();
  26450. pool = get_work_pool(work);
  26451. if (pool) {
  26452. - spin_lock(&pool->lock);
  26453. + spin_lock_irqsave(&pool->lock, flags);
  26454. if (find_worker_executing_work(pool, work))
  26455. ret |= WORK_BUSY_RUNNING;
  26456. - spin_unlock(&pool->lock);
  26457. + spin_unlock_irqrestore(&pool->lock, flags);
  26458. }
  26459. - local_irq_restore(flags);
  26460. + rcu_read_unlock();
  26461. return ret;
  26462. }
  26463. @@ -4310,7 +4350,7 @@
  26464. unsigned long flags;
  26465. int pi;
  26466. - rcu_read_lock_sched();
  26467. + rcu_read_lock();
  26468. pr_info("Showing busy workqueues and worker pools:\n");
  26469. @@ -4361,7 +4401,7 @@
  26470. spin_unlock_irqrestore(&pool->lock, flags);
  26471. }
  26472. - rcu_read_unlock_sched();
  26473. + rcu_read_unlock();
  26474. }
  26475. /*
  26476. @@ -4722,16 +4762,16 @@
  26477. * nr_active is monotonically decreasing. It's safe
  26478. * to peek without lock.
  26479. */
  26480. - rcu_read_lock_sched();
  26481. + rcu_read_lock();
  26482. for_each_pwq(pwq, wq) {
  26483. WARN_ON_ONCE(pwq->nr_active < 0);
  26484. if (pwq->nr_active) {
  26485. busy = true;
  26486. - rcu_read_unlock_sched();
  26487. + rcu_read_unlock();
  26488. goto out_unlock;
  26489. }
  26490. }
  26491. - rcu_read_unlock_sched();
  26492. + rcu_read_unlock();
  26493. }
  26494. out_unlock:
  26495. mutex_unlock(&wq_pool_mutex);
  26496. @@ -4921,7 +4961,8 @@
  26497. const char *delim = "";
  26498. int node, written = 0;
  26499. - rcu_read_lock_sched();
  26500. + get_online_cpus();
  26501. + rcu_read_lock();
  26502. for_each_node(node) {
  26503. written += scnprintf(buf + written, PAGE_SIZE - written,
  26504. "%s%d:%d", delim, node,
  26505. @@ -4929,7 +4970,8 @@
  26506. delim = " ";
  26507. }
  26508. written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
  26509. - rcu_read_unlock_sched();
  26510. + rcu_read_unlock();
  26511. + put_online_cpus();
  26512. return written;
  26513. }
  26514. diff -Nur linux-4.4.62.orig/kernel/workqueue_internal.h linux-4.4.62/kernel/workqueue_internal.h
  26515. --- linux-4.4.62.orig/kernel/workqueue_internal.h 2017-04-18 07:15:37.000000000 +0200
  26516. +++ linux-4.4.62/kernel/workqueue_internal.h 2017-04-18 17:38:08.230650641 +0200
  26517. @@ -43,6 +43,7 @@
  26518. unsigned long last_active; /* L: last active timestamp */
  26519. unsigned int flags; /* X: flags */
  26520. int id; /* I: worker id */
  26521. + int sleeping; /* None */
  26522. /*
  26523. * Opaque string set with work_set_desc(). Printed out with task
  26524. @@ -68,7 +69,7 @@
  26525. * Scheduler hooks for concurrency managed workqueue. Only to be used from
  26526. * sched/core.c and workqueue.c.
  26527. */
  26528. -void wq_worker_waking_up(struct task_struct *task, int cpu);
  26529. -struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu);
  26530. +void wq_worker_running(struct task_struct *task);
  26531. +void wq_worker_sleeping(struct task_struct *task);
  26532. #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
  26533. diff -Nur linux-4.4.62.orig/lib/debugobjects.c linux-4.4.62/lib/debugobjects.c
  26534. --- linux-4.4.62.orig/lib/debugobjects.c 2017-04-18 07:15:37.000000000 +0200
  26535. +++ linux-4.4.62/lib/debugobjects.c 2017-04-18 17:38:08.230650641 +0200
  26536. @@ -309,7 +309,10 @@
  26537. struct debug_obj *obj;
  26538. unsigned long flags;
  26539. - fill_pool();
  26540. +#ifdef CONFIG_PREEMPT_RT_FULL
  26541. + if (preempt_count() == 0 && !irqs_disabled())
  26542. +#endif
  26543. + fill_pool();
  26544. db = get_bucket((unsigned long) addr);
  26545. diff -Nur linux-4.4.62.orig/lib/idr.c linux-4.4.62/lib/idr.c
  26546. --- linux-4.4.62.orig/lib/idr.c 2017-04-18 07:15:37.000000000 +0200
  26547. +++ linux-4.4.62/lib/idr.c 2017-04-18 17:38:08.230650641 +0200
  26548. @@ -30,6 +30,7 @@
  26549. #include <linux/idr.h>
  26550. #include <linux/spinlock.h>
  26551. #include <linux/percpu.h>
  26552. +#include <linux/locallock.h>
  26553. #define MAX_IDR_SHIFT (sizeof(int) * 8 - 1)
  26554. #define MAX_IDR_BIT (1U << MAX_IDR_SHIFT)
  26555. @@ -45,6 +46,37 @@
  26556. static DEFINE_PER_CPU(int, idr_preload_cnt);
  26557. static DEFINE_SPINLOCK(simple_ida_lock);
  26558. +#ifdef CONFIG_PREEMPT_RT_FULL
  26559. +static DEFINE_LOCAL_IRQ_LOCK(idr_lock);
  26560. +
  26561. +static inline void idr_preload_lock(void)
  26562. +{
  26563. + local_lock(idr_lock);
  26564. +}
  26565. +
  26566. +static inline void idr_preload_unlock(void)
  26567. +{
  26568. + local_unlock(idr_lock);
  26569. +}
  26570. +
  26571. +void idr_preload_end(void)
  26572. +{
  26573. + idr_preload_unlock();
  26574. +}
  26575. +EXPORT_SYMBOL(idr_preload_end);
  26576. +#else
  26577. +static inline void idr_preload_lock(void)
  26578. +{
  26579. + preempt_disable();
  26580. +}
  26581. +
  26582. +static inline void idr_preload_unlock(void)
  26583. +{
  26584. + preempt_enable();
  26585. +}
  26586. +#endif
  26587. +
  26588. +
  26589. /* the maximum ID which can be allocated given idr->layers */
  26590. static int idr_max(int layers)
  26591. {
  26592. @@ -115,14 +147,14 @@
  26593. * context. See idr_preload() for details.
  26594. */
  26595. if (!in_interrupt()) {
  26596. - preempt_disable();
  26597. + idr_preload_lock();
  26598. new = __this_cpu_read(idr_preload_head);
  26599. if (new) {
  26600. __this_cpu_write(idr_preload_head, new->ary[0]);
  26601. __this_cpu_dec(idr_preload_cnt);
  26602. new->ary[0] = NULL;
  26603. }
  26604. - preempt_enable();
  26605. + idr_preload_unlock();
  26606. if (new)
  26607. return new;
  26608. }
  26609. @@ -366,7 +398,6 @@
  26610. idr_mark_full(pa, id);
  26611. }
  26612. -
  26613. /**
  26614. * idr_preload - preload for idr_alloc()
  26615. * @gfp_mask: allocation mask to use for preloading
  26616. @@ -401,7 +432,7 @@
  26617. WARN_ON_ONCE(in_interrupt());
  26618. might_sleep_if(gfpflags_allow_blocking(gfp_mask));
  26619. - preempt_disable();
  26620. + idr_preload_lock();
  26621. /*
  26622. * idr_alloc() is likely to succeed w/o full idr_layer buffer and
  26623. @@ -413,9 +444,9 @@
  26624. while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) {
  26625. struct idr_layer *new;
  26626. - preempt_enable();
  26627. + idr_preload_unlock();
  26628. new = kmem_cache_zalloc(idr_layer_cache, gfp_mask);
  26629. - preempt_disable();
  26630. + idr_preload_lock();
  26631. if (!new)
  26632. break;
  26633. diff -Nur linux-4.4.62.orig/lib/Kconfig linux-4.4.62/lib/Kconfig
  26634. --- linux-4.4.62.orig/lib/Kconfig 2017-04-18 07:15:37.000000000 +0200
  26635. +++ linux-4.4.62/lib/Kconfig 2017-04-18 17:38:08.230650641 +0200
  26636. @@ -397,6 +397,7 @@
  26637. config CPUMASK_OFFSTACK
  26638. bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
  26639. + depends on !PREEMPT_RT_FULL
  26640. help
  26641. Use dynamic allocation for cpumask_var_t, instead of putting
  26642. them on the stack. This is a bit more expensive, but avoids
  26643. diff -Nur linux-4.4.62.orig/lib/locking-selftest.c linux-4.4.62/lib/locking-selftest.c
  26644. --- linux-4.4.62.orig/lib/locking-selftest.c 2017-04-18 07:15:37.000000000 +0200
  26645. +++ linux-4.4.62/lib/locking-selftest.c 2017-04-18 17:38:08.230650641 +0200
  26646. @@ -590,6 +590,8 @@
  26647. #include "locking-selftest-spin-hardirq.h"
  26648. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin)
  26649. +#ifndef CONFIG_PREEMPT_RT_FULL
  26650. +
  26651. #include "locking-selftest-rlock-hardirq.h"
  26652. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock)
  26653. @@ -605,9 +607,12 @@
  26654. #include "locking-selftest-wlock-softirq.h"
  26655. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock)
  26656. +#endif
  26657. +
  26658. #undef E1
  26659. #undef E2
  26660. +#ifndef CONFIG_PREEMPT_RT_FULL
  26661. /*
  26662. * Enabling hardirqs with a softirq-safe lock held:
  26663. */
  26664. @@ -640,6 +645,8 @@
  26665. #undef E1
  26666. #undef E2
  26667. +#endif
  26668. +
  26669. /*
  26670. * Enabling irqs with an irq-safe lock held:
  26671. */
  26672. @@ -663,6 +670,8 @@
  26673. #include "locking-selftest-spin-hardirq.h"
  26674. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin)
  26675. +#ifndef CONFIG_PREEMPT_RT_FULL
  26676. +
  26677. #include "locking-selftest-rlock-hardirq.h"
  26678. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock)
  26679. @@ -678,6 +687,8 @@
  26680. #include "locking-selftest-wlock-softirq.h"
  26681. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
  26682. +#endif
  26683. +
  26684. #undef E1
  26685. #undef E2
  26686. @@ -709,6 +720,8 @@
  26687. #include "locking-selftest-spin-hardirq.h"
  26688. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin)
  26689. +#ifndef CONFIG_PREEMPT_RT_FULL
  26690. +
  26691. #include "locking-selftest-rlock-hardirq.h"
  26692. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock)
  26693. @@ -724,6 +737,8 @@
  26694. #include "locking-selftest-wlock-softirq.h"
  26695. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
  26696. +#endif
  26697. +
  26698. #undef E1
  26699. #undef E2
  26700. #undef E3
  26701. @@ -757,6 +772,8 @@
  26702. #include "locking-selftest-spin-hardirq.h"
  26703. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin)
  26704. +#ifndef CONFIG_PREEMPT_RT_FULL
  26705. +
  26706. #include "locking-selftest-rlock-hardirq.h"
  26707. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock)
  26708. @@ -772,10 +789,14 @@
  26709. #include "locking-selftest-wlock-softirq.h"
  26710. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock)
  26711. +#endif
  26712. +
  26713. #undef E1
  26714. #undef E2
  26715. #undef E3
  26716. +#ifndef CONFIG_PREEMPT_RT_FULL
  26717. +
  26718. /*
  26719. * read-lock / write-lock irq inversion.
  26720. *
  26721. @@ -838,6 +859,10 @@
  26722. #undef E2
  26723. #undef E3
  26724. +#endif
  26725. +
  26726. +#ifndef CONFIG_PREEMPT_RT_FULL
  26727. +
  26728. /*
  26729. * read-lock / write-lock recursion that is actually safe.
  26730. */
  26731. @@ -876,6 +901,8 @@
  26732. #undef E2
  26733. #undef E3
  26734. +#endif
  26735. +
  26736. /*
  26737. * read-lock / write-lock recursion that is unsafe.
  26738. */
  26739. @@ -1858,6 +1885,7 @@
  26740. printk(" --------------------------------------------------------------------------\n");
  26741. +#ifndef CONFIG_PREEMPT_RT_FULL
  26742. /*
  26743. * irq-context testcases:
  26744. */
  26745. @@ -1870,6 +1898,28 @@
  26746. DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
  26747. // DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);
  26748. +#else
  26749. + /* On -rt, we only do hardirq context test for raw spinlock */
  26750. + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 12);
  26751. + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 21);
  26752. +
  26753. + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 12);
  26754. + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 21);
  26755. +
  26756. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 123);
  26757. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 132);
  26758. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 213);
  26759. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 231);
  26760. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 312);
  26761. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 321);
  26762. +
  26763. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 123);
  26764. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 132);
  26765. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 213);
  26766. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 231);
  26767. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 312);
  26768. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 321);
  26769. +#endif
  26770. ww_tests();
  26771. diff -Nur linux-4.4.62.orig/lib/percpu_ida.c linux-4.4.62/lib/percpu_ida.c
  26772. --- linux-4.4.62.orig/lib/percpu_ida.c 2017-04-18 07:15:37.000000000 +0200
  26773. +++ linux-4.4.62/lib/percpu_ida.c 2017-04-18 17:38:08.230650641 +0200
  26774. @@ -26,6 +26,9 @@
  26775. #include <linux/string.h>
  26776. #include <linux/spinlock.h>
  26777. #include <linux/percpu_ida.h>
  26778. +#include <linux/locallock.h>
  26779. +
  26780. +static DEFINE_LOCAL_IRQ_LOCK(irq_off_lock);
  26781. struct percpu_ida_cpu {
  26782. /*
  26783. @@ -148,13 +151,13 @@
  26784. unsigned long flags;
  26785. int tag;
  26786. - local_irq_save(flags);
  26787. + local_lock_irqsave(irq_off_lock, flags);
  26788. tags = this_cpu_ptr(pool->tag_cpu);
  26789. /* Fastpath */
  26790. tag = alloc_local_tag(tags);
  26791. if (likely(tag >= 0)) {
  26792. - local_irq_restore(flags);
  26793. + local_unlock_irqrestore(irq_off_lock, flags);
  26794. return tag;
  26795. }
  26796. @@ -173,6 +176,7 @@
  26797. if (!tags->nr_free)
  26798. alloc_global_tags(pool, tags);
  26799. +
  26800. if (!tags->nr_free)
  26801. steal_tags(pool, tags);
  26802. @@ -184,7 +188,7 @@
  26803. }
  26804. spin_unlock(&pool->lock);
  26805. - local_irq_restore(flags);
  26806. + local_unlock_irqrestore(irq_off_lock, flags);
  26807. if (tag >= 0 || state == TASK_RUNNING)
  26808. break;
  26809. @@ -196,7 +200,7 @@
  26810. schedule();
  26811. - local_irq_save(flags);
  26812. + local_lock_irqsave(irq_off_lock, flags);
  26813. tags = this_cpu_ptr(pool->tag_cpu);
  26814. }
  26815. if (state != TASK_RUNNING)
  26816. @@ -221,7 +225,7 @@
  26817. BUG_ON(tag >= pool->nr_tags);
  26818. - local_irq_save(flags);
  26819. + local_lock_irqsave(irq_off_lock, flags);
  26820. tags = this_cpu_ptr(pool->tag_cpu);
  26821. spin_lock(&tags->lock);
  26822. @@ -253,7 +257,7 @@
  26823. spin_unlock(&pool->lock);
  26824. }
  26825. - local_irq_restore(flags);
  26826. + local_unlock_irqrestore(irq_off_lock, flags);
  26827. }
  26828. EXPORT_SYMBOL_GPL(percpu_ida_free);
  26829. @@ -345,7 +349,7 @@
  26830. struct percpu_ida_cpu *remote;
  26831. unsigned cpu, i, err = 0;
  26832. - local_irq_save(flags);
  26833. + local_lock_irqsave(irq_off_lock, flags);
  26834. for_each_possible_cpu(cpu) {
  26835. remote = per_cpu_ptr(pool->tag_cpu, cpu);
  26836. spin_lock(&remote->lock);
  26837. @@ -367,7 +371,7 @@
  26838. }
  26839. spin_unlock(&pool->lock);
  26840. out:
  26841. - local_irq_restore(flags);
  26842. + local_unlock_irqrestore(irq_off_lock, flags);
  26843. return err;
  26844. }
  26845. EXPORT_SYMBOL_GPL(percpu_ida_for_each_free);
  26846. diff -Nur linux-4.4.62.orig/lib/radix-tree.c linux-4.4.62/lib/radix-tree.c
  26847. --- linux-4.4.62.orig/lib/radix-tree.c 2017-04-18 07:15:37.000000000 +0200
  26848. +++ linux-4.4.62/lib/radix-tree.c 2017-04-18 17:38:08.230650641 +0200
  26849. @@ -34,7 +34,7 @@
  26850. #include <linux/bitops.h>
  26851. #include <linux/rcupdate.h>
  26852. #include <linux/preempt.h> /* in_interrupt() */
  26853. -
  26854. +#include <linux/locallock.h>
  26855. /*
  26856. * The height_to_maxindex array needs to be one deeper than the maximum
  26857. @@ -69,6 +69,7 @@
  26858. struct radix_tree_node *nodes;
  26859. };
  26860. static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
  26861. +static DEFINE_LOCAL_IRQ_LOCK(radix_tree_preloads_lock);
  26862. static inline void *ptr_to_indirect(void *ptr)
  26863. {
  26864. @@ -196,13 +197,14 @@
  26865. * succeed in getting a node here (and never reach
  26866. * kmem_cache_alloc)
  26867. */
  26868. - rtp = this_cpu_ptr(&radix_tree_preloads);
  26869. + rtp = &get_locked_var(radix_tree_preloads_lock, radix_tree_preloads);
  26870. if (rtp->nr) {
  26871. ret = rtp->nodes;
  26872. rtp->nodes = ret->private_data;
  26873. ret->private_data = NULL;
  26874. rtp->nr--;
  26875. }
  26876. + put_locked_var(radix_tree_preloads_lock, radix_tree_preloads);
  26877. /*
  26878. * Update the allocation stack trace as this is more useful
  26879. * for debugging.
  26880. @@ -257,14 +259,14 @@
  26881. struct radix_tree_node *node;
  26882. int ret = -ENOMEM;
  26883. - preempt_disable();
  26884. + local_lock(radix_tree_preloads_lock);
  26885. rtp = this_cpu_ptr(&radix_tree_preloads);
  26886. while (rtp->nr < RADIX_TREE_PRELOAD_SIZE) {
  26887. - preempt_enable();
  26888. + local_unlock(radix_tree_preloads_lock);
  26889. node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
  26890. if (node == NULL)
  26891. goto out;
  26892. - preempt_disable();
  26893. + local_lock(radix_tree_preloads_lock);
  26894. rtp = this_cpu_ptr(&radix_tree_preloads);
  26895. if (rtp->nr < RADIX_TREE_PRELOAD_SIZE) {
  26896. node->private_data = rtp->nodes;
  26897. @@ -306,11 +308,17 @@
  26898. if (gfpflags_allow_blocking(gfp_mask))
  26899. return __radix_tree_preload(gfp_mask);
  26900. /* Preloading doesn't help anything with this gfp mask, skip it */
  26901. - preempt_disable();
  26902. + local_lock(radix_tree_preloads_lock);
  26903. return 0;
  26904. }
  26905. EXPORT_SYMBOL(radix_tree_maybe_preload);
  26906. +void radix_tree_preload_end(void)
  26907. +{
  26908. + local_unlock(radix_tree_preloads_lock);
  26909. +}
  26910. +EXPORT_SYMBOL(radix_tree_preload_end);
  26911. +
  26912. /*
  26913. * Return the maximum key which can be store into a
  26914. * radix tree with height HEIGHT.
  26915. diff -Nur linux-4.4.62.orig/lib/rbtree.c linux-4.4.62/lib/rbtree.c
  26916. --- linux-4.4.62.orig/lib/rbtree.c 2017-04-18 07:15:37.000000000 +0200
  26917. +++ linux-4.4.62/lib/rbtree.c 2017-04-18 17:38:08.230650641 +0200
  26918. @@ -23,6 +23,7 @@
  26919. #include <linux/rbtree_augmented.h>
  26920. #include <linux/export.h>
  26921. +#include <linux/rcupdate.h>
  26922. /*
  26923. * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree
  26924. @@ -590,3 +591,13 @@
  26925. return rb_left_deepest_node(root->rb_node);
  26926. }
  26927. EXPORT_SYMBOL(rb_first_postorder);
  26928. +
  26929. +void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent,
  26930. + struct rb_node **rb_link)
  26931. +{
  26932. + node->__rb_parent_color = (unsigned long)parent;
  26933. + node->rb_left = node->rb_right = NULL;
  26934. +
  26935. + rcu_assign_pointer(*rb_link, node);
  26936. +}
  26937. +EXPORT_SYMBOL(rb_link_node_rcu);
  26938. diff -Nur linux-4.4.62.orig/lib/scatterlist.c linux-4.4.62/lib/scatterlist.c
  26939. --- linux-4.4.62.orig/lib/scatterlist.c 2017-04-18 07:15:37.000000000 +0200
  26940. +++ linux-4.4.62/lib/scatterlist.c 2017-04-18 17:38:08.230650641 +0200
  26941. @@ -620,7 +620,7 @@
  26942. flush_kernel_dcache_page(miter->page);
  26943. if (miter->__flags & SG_MITER_ATOMIC) {
  26944. - WARN_ON_ONCE(preemptible());
  26945. + WARN_ON_ONCE(!pagefault_disabled());
  26946. kunmap_atomic(miter->addr);
  26947. } else
  26948. kunmap(miter->page);
  26949. @@ -664,7 +664,7 @@
  26950. if (!sg_miter_skip(&miter, skip))
  26951. return false;
  26952. - local_irq_save(flags);
  26953. + local_irq_save_nort(flags);
  26954. while (sg_miter_next(&miter) && offset < buflen) {
  26955. unsigned int len;
  26956. @@ -681,7 +681,7 @@
  26957. sg_miter_stop(&miter);
  26958. - local_irq_restore(flags);
  26959. + local_irq_restore_nort(flags);
  26960. return offset;
  26961. }
  26962. EXPORT_SYMBOL(sg_copy_buffer);
  26963. diff -Nur linux-4.4.62.orig/lib/smp_processor_id.c linux-4.4.62/lib/smp_processor_id.c
  26964. --- linux-4.4.62.orig/lib/smp_processor_id.c 2017-04-18 07:15:37.000000000 +0200
  26965. +++ linux-4.4.62/lib/smp_processor_id.c 2017-04-18 17:38:08.230650641 +0200
  26966. @@ -39,8 +39,9 @@
  26967. if (!printk_ratelimit())
  26968. goto out_enable;
  26969. - printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x] code: %s/%d\n",
  26970. - what1, what2, preempt_count() - 1, current->comm, current->pid);
  26971. + printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x %08x] code: %s/%d\n",
  26972. + what1, what2, preempt_count() - 1, __migrate_disabled(current),
  26973. + current->comm, current->pid);
  26974. print_symbol("caller is %s\n", (long)__builtin_return_address(0));
  26975. dump_stack();
  26976. diff -Nur linux-4.4.62.orig/Makefile linux-4.4.62/Makefile
  26977. --- linux-4.4.62.orig/Makefile 2017-04-18 07:15:37.000000000 +0200
  26978. +++ linux-4.4.62/Makefile 2017-04-18 17:38:07.902637922 +0200
  26979. @@ -785,6 +785,9 @@
  26980. # Prohibit date/time macros, which would make the build non-deterministic
  26981. KBUILD_CFLAGS += $(call cc-option,-Werror=date-time)
  26982. +# enforce correct pointer usage
  26983. +KBUILD_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types)
  26984. +
  26985. # use the deterministic mode of AR if available
  26986. KBUILD_ARFLAGS := $(call ar-option,D)
  26987. diff -Nur linux-4.4.62.orig/mm/backing-dev.c linux-4.4.62/mm/backing-dev.c
  26988. --- linux-4.4.62.orig/mm/backing-dev.c 2017-04-18 07:15:37.000000000 +0200
  26989. +++ linux-4.4.62/mm/backing-dev.c 2017-04-18 17:38:08.230650641 +0200
  26990. @@ -457,9 +457,9 @@
  26991. {
  26992. unsigned long flags;
  26993. - local_irq_save(flags);
  26994. + local_irq_save_nort(flags);
  26995. if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
  26996. - local_irq_restore(flags);
  26997. + local_irq_restore_nort(flags);
  26998. return;
  26999. }
  27000. diff -Nur linux-4.4.62.orig/mm/compaction.c linux-4.4.62/mm/compaction.c
  27001. --- linux-4.4.62.orig/mm/compaction.c 2017-04-18 07:15:37.000000000 +0200
  27002. +++ linux-4.4.62/mm/compaction.c 2017-04-18 17:38:08.230650641 +0200
  27003. @@ -1430,10 +1430,12 @@
  27004. cc->migrate_pfn & ~((1UL << cc->order) - 1);
  27005. if (cc->last_migrated_pfn < current_block_start) {
  27006. - cpu = get_cpu();
  27007. + cpu = get_cpu_light();
  27008. + local_lock_irq(swapvec_lock);
  27009. lru_add_drain_cpu(cpu);
  27010. + local_unlock_irq(swapvec_lock);
  27011. drain_local_pages(zone);
  27012. - put_cpu();
  27013. + put_cpu_light();
  27014. /* No more flushing until we migrate again */
  27015. cc->last_migrated_pfn = 0;
  27016. }
  27017. diff -Nur linux-4.4.62.orig/mm/filemap.c linux-4.4.62/mm/filemap.c
  27018. --- linux-4.4.62.orig/mm/filemap.c 2017-04-18 07:15:37.000000000 +0200
  27019. +++ linux-4.4.62/mm/filemap.c 2017-04-18 17:38:08.230650641 +0200
  27020. @@ -144,9 +144,12 @@
  27021. * node->private_list is protected by
  27022. * mapping->tree_lock.
  27023. */
  27024. - if (!list_empty(&node->private_list))
  27025. - list_lru_del(&workingset_shadow_nodes,
  27026. + if (!list_empty(&node->private_list)) {
  27027. + local_lock(workingset_shadow_lock);
  27028. + list_lru_del(&__workingset_shadow_nodes,
  27029. &node->private_list);
  27030. + local_unlock(workingset_shadow_lock);
  27031. + }
  27032. }
  27033. return 0;
  27034. }
  27035. @@ -218,7 +221,9 @@
  27036. if (!workingset_node_pages(node) &&
  27037. list_empty(&node->private_list)) {
  27038. node->private_data = mapping;
  27039. - list_lru_add(&workingset_shadow_nodes, &node->private_list);
  27040. + local_lock(workingset_shadow_lock);
  27041. + list_lru_add(&__workingset_shadow_nodes, &node->private_list);
  27042. + local_unlock(workingset_shadow_lock);
  27043. }
  27044. }
  27045. diff -Nur linux-4.4.62.orig/mm/highmem.c linux-4.4.62/mm/highmem.c
  27046. --- linux-4.4.62.orig/mm/highmem.c 2017-04-18 07:15:37.000000000 +0200
  27047. +++ linux-4.4.62/mm/highmem.c 2017-04-18 17:38:08.230650641 +0200
  27048. @@ -29,10 +29,11 @@
  27049. #include <linux/kgdb.h>
  27050. #include <asm/tlbflush.h>
  27051. -
  27052. +#ifndef CONFIG_PREEMPT_RT_FULL
  27053. #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
  27054. DEFINE_PER_CPU(int, __kmap_atomic_idx);
  27055. #endif
  27056. +#endif
  27057. /*
  27058. * Virtual_count is not a pure "count".
  27059. @@ -107,8 +108,9 @@
  27060. unsigned long totalhigh_pages __read_mostly;
  27061. EXPORT_SYMBOL(totalhigh_pages);
  27062. -
  27063. +#ifndef CONFIG_PREEMPT_RT_FULL
  27064. EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx);
  27065. +#endif
  27066. unsigned int nr_free_highpages (void)
  27067. {
  27068. diff -Nur linux-4.4.62.orig/mm/Kconfig linux-4.4.62/mm/Kconfig
  27069. --- linux-4.4.62.orig/mm/Kconfig 2017-04-18 07:15:37.000000000 +0200
  27070. +++ linux-4.4.62/mm/Kconfig 2017-04-18 17:38:08.230650641 +0200
  27071. @@ -392,7 +392,7 @@
  27072. config TRANSPARENT_HUGEPAGE
  27073. bool "Transparent Hugepage Support"
  27074. - depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
  27075. + depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT_FULL
  27076. select COMPACTION
  27077. help
  27078. Transparent Hugepages allows the kernel to use huge pages and
  27079. diff -Nur linux-4.4.62.orig/mm/memcontrol.c linux-4.4.62/mm/memcontrol.c
  27080. --- linux-4.4.62.orig/mm/memcontrol.c 2017-04-18 07:15:37.000000000 +0200
  27081. +++ linux-4.4.62/mm/memcontrol.c 2017-04-18 17:38:08.234650796 +0200
  27082. @@ -67,6 +67,8 @@
  27083. #include <net/sock.h>
  27084. #include <net/ip.h>
  27085. #include <net/tcp_memcontrol.h>
  27086. +#include <linux/locallock.h>
  27087. +
  27088. #include "slab.h"
  27089. #include <asm/uaccess.h>
  27090. @@ -87,6 +89,7 @@
  27091. #define do_swap_account 0
  27092. #endif
  27093. +static DEFINE_LOCAL_IRQ_LOCK(event_lock);
  27094. static const char * const mem_cgroup_stat_names[] = {
  27095. "cache",
  27096. "rss",
  27097. @@ -1922,14 +1925,17 @@
  27098. */
  27099. static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
  27100. {
  27101. - struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
  27102. + struct memcg_stock_pcp *stock;
  27103. + int cpu = get_cpu_light();
  27104. +
  27105. + stock = &per_cpu(memcg_stock, cpu);
  27106. if (stock->cached != memcg) { /* reset if necessary */
  27107. drain_stock(stock);
  27108. stock->cached = memcg;
  27109. }
  27110. stock->nr_pages += nr_pages;
  27111. - put_cpu_var(memcg_stock);
  27112. + put_cpu_light();
  27113. }
  27114. /*
  27115. @@ -1945,7 +1951,7 @@
  27116. return;
  27117. /* Notify other cpus that system-wide "drain" is running */
  27118. get_online_cpus();
  27119. - curcpu = get_cpu();
  27120. + curcpu = get_cpu_light();
  27121. for_each_online_cpu(cpu) {
  27122. struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
  27123. struct mem_cgroup *memcg;
  27124. @@ -1962,7 +1968,7 @@
  27125. schedule_work_on(cpu, &stock->work);
  27126. }
  27127. }
  27128. - put_cpu();
  27129. + put_cpu_light();
  27130. put_online_cpus();
  27131. mutex_unlock(&percpu_charge_mutex);
  27132. }
  27133. @@ -4691,12 +4697,12 @@
  27134. ret = 0;
  27135. - local_irq_disable();
  27136. + local_lock_irq(event_lock);
  27137. mem_cgroup_charge_statistics(to, page, nr_pages);
  27138. memcg_check_events(to, page);
  27139. mem_cgroup_charge_statistics(from, page, -nr_pages);
  27140. memcg_check_events(from, page);
  27141. - local_irq_enable();
  27142. + local_unlock_irq(event_lock);
  27143. out_unlock:
  27144. unlock_page(page);
  27145. out:
  27146. @@ -5486,10 +5492,10 @@
  27147. VM_BUG_ON_PAGE(!PageTransHuge(page), page);
  27148. }
  27149. - local_irq_disable();
  27150. + local_lock_irq(event_lock);
  27151. mem_cgroup_charge_statistics(memcg, page, nr_pages);
  27152. memcg_check_events(memcg, page);
  27153. - local_irq_enable();
  27154. + local_unlock_irq(event_lock);
  27155. if (do_swap_account && PageSwapCache(page)) {
  27156. swp_entry_t entry = { .val = page_private(page) };
  27157. @@ -5545,14 +5551,14 @@
  27158. memcg_oom_recover(memcg);
  27159. }
  27160. - local_irq_save(flags);
  27161. + local_lock_irqsave(event_lock, flags);
  27162. __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);
  27163. __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
  27164. __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
  27165. __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
  27166. __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
  27167. memcg_check_events(memcg, dummy_page);
  27168. - local_irq_restore(flags);
  27169. + local_unlock_irqrestore(event_lock, flags);
  27170. if (!mem_cgroup_is_root(memcg))
  27171. css_put_many(&memcg->css, nr_pages);
  27172. @@ -5762,6 +5768,7 @@
  27173. {
  27174. struct mem_cgroup *memcg, *swap_memcg;
  27175. unsigned short oldid;
  27176. + unsigned long flags;
  27177. VM_BUG_ON_PAGE(PageLRU(page), page);
  27178. VM_BUG_ON_PAGE(page_count(page), page);
  27179. @@ -5802,12 +5809,16 @@
  27180. * important here to have the interrupts disabled because it is the
  27181. * only synchronisation we have for udpating the per-CPU variables.
  27182. */
  27183. + local_lock_irqsave(event_lock, flags);
  27184. +#ifndef CONFIG_PREEMPT_RT_BASE
  27185. VM_BUG_ON(!irqs_disabled());
  27186. +#endif
  27187. mem_cgroup_charge_statistics(memcg, page, -1);
  27188. memcg_check_events(memcg, page);
  27189. if (!mem_cgroup_is_root(memcg))
  27190. css_put(&memcg->css);
  27191. + local_unlock_irqrestore(event_lock, flags);
  27192. }
  27193. /**
  27194. diff -Nur linux-4.4.62.orig/mm/mmu_context.c linux-4.4.62/mm/mmu_context.c
  27195. --- linux-4.4.62.orig/mm/mmu_context.c 2017-04-18 07:15:37.000000000 +0200
  27196. +++ linux-4.4.62/mm/mmu_context.c 2017-04-18 17:38:08.234650796 +0200
  27197. @@ -23,6 +23,7 @@
  27198. struct task_struct *tsk = current;
  27199. task_lock(tsk);
  27200. + preempt_disable_rt();
  27201. active_mm = tsk->active_mm;
  27202. if (active_mm != mm) {
  27203. atomic_inc(&mm->mm_count);
  27204. @@ -30,6 +31,7 @@
  27205. }
  27206. tsk->mm = mm;
  27207. switch_mm(active_mm, mm, tsk);
  27208. + preempt_enable_rt();
  27209. task_unlock(tsk);
  27210. #ifdef finish_arch_post_lock_switch
  27211. finish_arch_post_lock_switch();
  27212. diff -Nur linux-4.4.62.orig/mm/page_alloc.c linux-4.4.62/mm/page_alloc.c
  27213. --- linux-4.4.62.orig/mm/page_alloc.c 2017-04-18 07:15:37.000000000 +0200
  27214. +++ linux-4.4.62/mm/page_alloc.c 2017-04-18 17:38:08.234650796 +0200
  27215. @@ -60,6 +60,7 @@
  27216. #include <linux/page_ext.h>
  27217. #include <linux/hugetlb.h>
  27218. #include <linux/sched/rt.h>
  27219. +#include <linux/locallock.h>
  27220. #include <linux/page_owner.h>
  27221. #include <linux/kthread.h>
  27222. @@ -264,6 +265,18 @@
  27223. EXPORT_SYMBOL(nr_online_nodes);
  27224. #endif
  27225. +static DEFINE_LOCAL_IRQ_LOCK(pa_lock);
  27226. +
  27227. +#ifdef CONFIG_PREEMPT_RT_BASE
  27228. +# define cpu_lock_irqsave(cpu, flags) \
  27229. + local_lock_irqsave_on(pa_lock, flags, cpu)
  27230. +# define cpu_unlock_irqrestore(cpu, flags) \
  27231. + local_unlock_irqrestore_on(pa_lock, flags, cpu)
  27232. +#else
  27233. +# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags)
  27234. +# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags)
  27235. +#endif
  27236. +
  27237. int page_group_by_mobility_disabled __read_mostly;
  27238. #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
  27239. @@ -786,7 +799,7 @@
  27240. }
  27241. /*
  27242. - * Frees a number of pages from the PCP lists
  27243. + * Frees a number of pages which have been collected from the pcp lists.
  27244. * Assumes all pages on list are in same zone, and of same order.
  27245. * count is the number of pages to free.
  27246. *
  27247. @@ -797,18 +810,53 @@
  27248. * pinned" detection logic.
  27249. */
  27250. static void free_pcppages_bulk(struct zone *zone, int count,
  27251. - struct per_cpu_pages *pcp)
  27252. + struct list_head *list)
  27253. {
  27254. - int migratetype = 0;
  27255. - int batch_free = 0;
  27256. int to_free = count;
  27257. unsigned long nr_scanned;
  27258. + unsigned long flags;
  27259. +
  27260. + spin_lock_irqsave(&zone->lock, flags);
  27261. - spin_lock(&zone->lock);
  27262. nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
  27263. if (nr_scanned)
  27264. __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
  27265. + while (!list_empty(list)) {
  27266. + struct page *page = list_first_entry(list, struct page, lru);
  27267. + int mt; /* migratetype of the to-be-freed page */
  27268. +
  27269. + /* must delete as __free_one_page list manipulates */
  27270. + list_del(&page->lru);
  27271. +
  27272. + mt = get_pcppage_migratetype(page);
  27273. + /* MIGRATE_ISOLATE page should not go to pcplists */
  27274. + VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
  27275. + /* Pageblock could have been isolated meanwhile */
  27276. + if (unlikely(has_isolate_pageblock(zone)))
  27277. + mt = get_pageblock_migratetype(page);
  27278. +
  27279. + __free_one_page(page, page_to_pfn(page), zone, 0, mt);
  27280. + trace_mm_page_pcpu_drain(page, 0, mt);
  27281. + to_free--;
  27282. + }
  27283. + WARN_ON(to_free != 0);
  27284. + spin_unlock_irqrestore(&zone->lock, flags);
  27285. +}
  27286. +
  27287. +/*
  27288. + * Moves a number of pages from the PCP lists to free list which
  27289. + * is freed outside of the locked region.
  27290. + *
  27291. + * Assumes all pages on list are in same zone, and of same order.
  27292. + * count is the number of pages to free.
  27293. + */
  27294. +static void isolate_pcp_pages(int to_free, struct per_cpu_pages *src,
  27295. + struct list_head *dst)
  27296. +{
  27297. + int migratetype = 0;
  27298. + int batch_free = 0;
  27299. +
  27300. while (to_free) {
  27301. struct page *page;
  27302. struct list_head *list;
  27303. @@ -824,7 +872,7 @@
  27304. batch_free++;
  27305. if (++migratetype == MIGRATE_PCPTYPES)
  27306. migratetype = 0;
  27307. - list = &pcp->lists[migratetype];
  27308. + list = &src->lists[migratetype];
  27309. } while (list_empty(list));
  27310. /* This is the only non-empty list. Free them all. */
  27311. @@ -832,24 +880,12 @@
  27312. batch_free = to_free;
  27313. do {
  27314. - int mt; /* migratetype of the to-be-freed page */
  27315. -
  27316. - page = list_entry(list->prev, struct page, lru);
  27317. - /* must delete as __free_one_page list manipulates */
  27318. + page = list_last_entry(list, struct page, lru);
  27319. list_del(&page->lru);
  27320. - mt = get_pcppage_migratetype(page);
  27321. - /* MIGRATE_ISOLATE page should not go to pcplists */
  27322. - VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
  27323. - /* Pageblock could have been isolated meanwhile */
  27324. - if (unlikely(has_isolate_pageblock(zone)))
  27325. - mt = get_pageblock_migratetype(page);
  27326. -
  27327. - __free_one_page(page, page_to_pfn(page), zone, 0, mt);
  27328. - trace_mm_page_pcpu_drain(page, 0, mt);
  27329. + list_add(&page->lru, dst);
  27330. } while (--to_free && --batch_free && !list_empty(list));
  27331. }
  27332. - spin_unlock(&zone->lock);
  27333. }
  27334. static void free_one_page(struct zone *zone,
  27335. @@ -858,7 +894,9 @@
  27336. int migratetype)
  27337. {
  27338. unsigned long nr_scanned;
  27339. - spin_lock(&zone->lock);
  27340. + unsigned long flags;
  27341. +
  27342. + spin_lock_irqsave(&zone->lock, flags);
  27343. nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
  27344. if (nr_scanned)
  27345. __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
  27346. @@ -868,7 +906,7 @@
  27347. migratetype = get_pfnblock_migratetype(page, pfn);
  27348. }
  27349. __free_one_page(page, pfn, zone, order, migratetype);
  27350. - spin_unlock(&zone->lock);
  27351. + spin_unlock_irqrestore(&zone->lock, flags);
  27352. }
  27353. static int free_tail_pages_check(struct page *head_page, struct page *page)
  27354. @@ -1019,10 +1057,10 @@
  27355. return;
  27356. migratetype = get_pfnblock_migratetype(page, pfn);
  27357. - local_irq_save(flags);
  27358. + local_lock_irqsave(pa_lock, flags);
  27359. __count_vm_events(PGFREE, 1 << order);
  27360. free_one_page(page_zone(page), page, pfn, order, migratetype);
  27361. - local_irq_restore(flags);
  27362. + local_unlock_irqrestore(pa_lock, flags);
  27363. }
  27364. static void __init __free_pages_boot_core(struct page *page,
  27365. @@ -1879,16 +1917,18 @@
  27366. void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
  27367. {
  27368. unsigned long flags;
  27369. + LIST_HEAD(dst);
  27370. int to_drain, batch;
  27371. - local_irq_save(flags);
  27372. + local_lock_irqsave(pa_lock, flags);
  27373. batch = READ_ONCE(pcp->batch);
  27374. to_drain = min(pcp->count, batch);
  27375. if (to_drain > 0) {
  27376. - free_pcppages_bulk(zone, to_drain, pcp);
  27377. + isolate_pcp_pages(to_drain, pcp, &dst);
  27378. pcp->count -= to_drain;
  27379. }
  27380. - local_irq_restore(flags);
  27381. + local_unlock_irqrestore(pa_lock, flags);
  27382. + free_pcppages_bulk(zone, to_drain, &dst);
  27383. }
  27384. #endif
  27385. @@ -1904,16 +1944,21 @@
  27386. unsigned long flags;
  27387. struct per_cpu_pageset *pset;
  27388. struct per_cpu_pages *pcp;
  27389. + LIST_HEAD(dst);
  27390. + int count;
  27391. - local_irq_save(flags);
  27392. + cpu_lock_irqsave(cpu, flags);
  27393. pset = per_cpu_ptr(zone->pageset, cpu);
  27394. pcp = &pset->pcp;
  27395. - if (pcp->count) {
  27396. - free_pcppages_bulk(zone, pcp->count, pcp);
  27397. + count = pcp->count;
  27398. + if (count) {
  27399. + isolate_pcp_pages(count, pcp, &dst);
  27400. pcp->count = 0;
  27401. }
  27402. - local_irq_restore(flags);
  27403. + cpu_unlock_irqrestore(cpu, flags);
  27404. + if (count)
  27405. + free_pcppages_bulk(zone, count, &dst);
  27406. }
  27407. /*
  27408. @@ -1999,8 +2044,17 @@
  27409. else
  27410. cpumask_clear_cpu(cpu, &cpus_with_pcps);
  27411. }
  27412. +#ifndef CONFIG_PREEMPT_RT_BASE
  27413. on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages,
  27414. zone, 1);
  27415. +#else
  27416. + for_each_cpu(cpu, &cpus_with_pcps) {
  27417. + if (zone)
  27418. + drain_pages_zone(cpu, zone);
  27419. + else
  27420. + drain_pages(cpu);
  27421. + }
  27422. +#endif
  27423. }
  27424. #ifdef CONFIG_HIBERNATION
  27425. @@ -2056,7 +2110,7 @@
  27426. migratetype = get_pfnblock_migratetype(page, pfn);
  27427. set_pcppage_migratetype(page, migratetype);
  27428. - local_irq_save(flags);
  27429. + local_lock_irqsave(pa_lock, flags);
  27430. __count_vm_event(PGFREE);
  27431. /*
  27432. @@ -2082,12 +2136,17 @@
  27433. pcp->count++;
  27434. if (pcp->count >= pcp->high) {
  27435. unsigned long batch = READ_ONCE(pcp->batch);
  27436. - free_pcppages_bulk(zone, batch, pcp);
  27437. + LIST_HEAD(dst);
  27438. +
  27439. + isolate_pcp_pages(batch, pcp, &dst);
  27440. pcp->count -= batch;
  27441. + local_unlock_irqrestore(pa_lock, flags);
  27442. + free_pcppages_bulk(zone, batch, &dst);
  27443. + return;
  27444. }
  27445. out:
  27446. - local_irq_restore(flags);
  27447. + local_unlock_irqrestore(pa_lock, flags);
  27448. }
  27449. /*
  27450. @@ -2222,7 +2281,7 @@
  27451. struct per_cpu_pages *pcp;
  27452. struct list_head *list;
  27453. - local_irq_save(flags);
  27454. + local_lock_irqsave(pa_lock, flags);
  27455. pcp = &this_cpu_ptr(zone->pageset)->pcp;
  27456. list = &pcp->lists[migratetype];
  27457. if (list_empty(list)) {
  27458. @@ -2254,7 +2313,7 @@
  27459. */
  27460. WARN_ON_ONCE(order > 1);
  27461. }
  27462. - spin_lock_irqsave(&zone->lock, flags);
  27463. + local_spin_lock_irqsave(pa_lock, &zone->lock, flags);
  27464. page = NULL;
  27465. if (alloc_flags & ALLOC_HARDER) {
  27466. @@ -2264,11 +2323,13 @@
  27467. }
  27468. if (!page)
  27469. page = __rmqueue(zone, order, migratetype, gfp_flags);
  27470. - spin_unlock(&zone->lock);
  27471. - if (!page)
  27472. + if (!page) {
  27473. + spin_unlock(&zone->lock);
  27474. goto failed;
  27475. + }
  27476. __mod_zone_freepage_state(zone, -(1 << order),
  27477. get_pcppage_migratetype(page));
  27478. + spin_unlock(&zone->lock);
  27479. }
  27480. __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
  27481. @@ -2278,13 +2339,13 @@
  27482. __count_zone_vm_events(PGALLOC, zone, 1 << order);
  27483. zone_statistics(preferred_zone, zone, gfp_flags);
  27484. - local_irq_restore(flags);
  27485. + local_unlock_irqrestore(pa_lock, flags);
  27486. VM_BUG_ON_PAGE(bad_range(zone, page), page);
  27487. return page;
  27488. failed:
  27489. - local_irq_restore(flags);
  27490. + local_unlock_irqrestore(pa_lock, flags);
  27491. return NULL;
  27492. }
  27493. @@ -5953,6 +6014,7 @@
  27494. void __init page_alloc_init(void)
  27495. {
  27496. hotcpu_notifier(page_alloc_cpu_notify, 0);
  27497. + local_irq_lock_init(pa_lock);
  27498. }
  27499. /*
  27500. @@ -6847,7 +6909,7 @@
  27501. struct per_cpu_pageset *pset;
  27502. /* avoid races with drain_pages() */
  27503. - local_irq_save(flags);
  27504. + local_lock_irqsave(pa_lock, flags);
  27505. if (zone->pageset != &boot_pageset) {
  27506. for_each_online_cpu(cpu) {
  27507. pset = per_cpu_ptr(zone->pageset, cpu);
  27508. @@ -6856,7 +6918,7 @@
  27509. free_percpu(zone->pageset);
  27510. zone->pageset = &boot_pageset;
  27511. }
  27512. - local_irq_restore(flags);
  27513. + local_unlock_irqrestore(pa_lock, flags);
  27514. }
  27515. #ifdef CONFIG_MEMORY_HOTREMOVE
  27516. diff -Nur linux-4.4.62.orig/mm/percpu.c linux-4.4.62/mm/percpu.c
  27517. --- linux-4.4.62.orig/mm/percpu.c 2017-04-18 07:15:37.000000000 +0200
  27518. +++ linux-4.4.62/mm/percpu.c 2017-04-18 17:38:08.234650796 +0200
  27519. @@ -1285,18 +1285,7 @@
  27520. }
  27521. EXPORT_SYMBOL_GPL(free_percpu);
  27522. -/**
  27523. - * is_kernel_percpu_address - test whether address is from static percpu area
  27524. - * @addr: address to test
  27525. - *
  27526. - * Test whether @addr belongs to in-kernel static percpu area. Module
  27527. - * static percpu areas are not considered. For those, use
  27528. - * is_module_percpu_address().
  27529. - *
  27530. - * RETURNS:
  27531. - * %true if @addr is from in-kernel static percpu area, %false otherwise.
  27532. - */
  27533. -bool is_kernel_percpu_address(unsigned long addr)
  27534. +bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr)
  27535. {
  27536. #ifdef CONFIG_SMP
  27537. const size_t static_size = __per_cpu_end - __per_cpu_start;
  27538. @@ -1305,16 +1294,36 @@
  27539. for_each_possible_cpu(cpu) {
  27540. void *start = per_cpu_ptr(base, cpu);
  27541. + void *va = (void *)addr;
  27542. - if ((void *)addr >= start && (void *)addr < start + static_size)
  27543. + if (va >= start && va < start + static_size) {
  27544. + if (can_addr)
  27545. + *can_addr = (unsigned long) (va - start);
  27546. return true;
  27547. - }
  27548. + }
  27549. + }
  27550. #endif
  27551. /* on UP, can't distinguish from other static vars, always false */
  27552. return false;
  27553. }
  27554. /**
  27555. + * is_kernel_percpu_address - test whether address is from static percpu area
  27556. + * @addr: address to test
  27557. + *
  27558. + * Test whether @addr belongs to in-kernel static percpu area. Module
  27559. + * static percpu areas are not considered. For those, use
  27560. + * is_module_percpu_address().
  27561. + *
  27562. + * RETURNS:
  27563. + * %true if @addr is from in-kernel static percpu area, %false otherwise.
  27564. + */
  27565. +bool is_kernel_percpu_address(unsigned long addr)
  27566. +{
  27567. + return __is_kernel_percpu_address(addr, NULL);
  27568. +}
  27569. +
  27570. +/**
  27571. * per_cpu_ptr_to_phys - convert translated percpu address to physical address
  27572. * @addr: the address to be converted to physical address
  27573. *
  27574. diff -Nur linux-4.4.62.orig/mm/slab.h linux-4.4.62/mm/slab.h
  27575. --- linux-4.4.62.orig/mm/slab.h 2017-04-18 07:15:37.000000000 +0200
  27576. +++ linux-4.4.62/mm/slab.h 2017-04-18 17:38:08.234650796 +0200
  27577. @@ -324,7 +324,11 @@
  27578. * The slab lists for all objects.
  27579. */
  27580. struct kmem_cache_node {
  27581. +#ifdef CONFIG_SLUB
  27582. + raw_spinlock_t list_lock;
  27583. +#else
  27584. spinlock_t list_lock;
  27585. +#endif
  27586. #ifdef CONFIG_SLAB
  27587. struct list_head slabs_partial; /* partial list first, better asm code */
  27588. diff -Nur linux-4.4.62.orig/mm/slub.c linux-4.4.62/mm/slub.c
  27589. --- linux-4.4.62.orig/mm/slub.c 2017-04-18 07:15:37.000000000 +0200
  27590. +++ linux-4.4.62/mm/slub.c 2017-04-18 17:38:08.234650796 +0200
  27591. @@ -1075,7 +1075,7 @@
  27592. void *object = head;
  27593. int cnt = 0;
  27594. - spin_lock_irqsave(&n->list_lock, *flags);
  27595. + raw_spin_lock_irqsave(&n->list_lock, *flags);
  27596. slab_lock(page);
  27597. if (!check_slab(s, page))
  27598. @@ -1136,7 +1136,7 @@
  27599. fail:
  27600. slab_unlock(page);
  27601. - spin_unlock_irqrestore(&n->list_lock, *flags);
  27602. + raw_spin_unlock_irqrestore(&n->list_lock, *flags);
  27603. slab_fix(s, "Object at 0x%p not freed", object);
  27604. return NULL;
  27605. }
  27606. @@ -1263,6 +1263,12 @@
  27607. #endif /* CONFIG_SLUB_DEBUG */
  27608. +struct slub_free_list {
  27609. + raw_spinlock_t lock;
  27610. + struct list_head list;
  27611. +};
  27612. +static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
  27613. +
  27614. /*
  27615. * Hooks for other subsystems that check memory allocations. In a typical
  27616. * production configuration these hooks all should produce no code at all.
  27617. @@ -1399,10 +1405,17 @@
  27618. gfp_t alloc_gfp;
  27619. void *start, *p;
  27620. int idx, order;
  27621. + bool enableirqs = false;
  27622. flags &= gfp_allowed_mask;
  27623. if (gfpflags_allow_blocking(flags))
  27624. + enableirqs = true;
  27625. +#ifdef CONFIG_PREEMPT_RT_FULL
  27626. + if (system_state == SYSTEM_RUNNING)
  27627. + enableirqs = true;
  27628. +#endif
  27629. + if (enableirqs)
  27630. local_irq_enable();
  27631. flags |= s->allocflags;
  27632. @@ -1473,7 +1486,7 @@
  27633. page->frozen = 1;
  27634. out:
  27635. - if (gfpflags_allow_blocking(flags))
  27636. + if (enableirqs)
  27637. local_irq_disable();
  27638. if (!page)
  27639. return NULL;
  27640. @@ -1529,6 +1542,16 @@
  27641. __free_kmem_pages(page, order);
  27642. }
  27643. +static void free_delayed(struct list_head *h)
  27644. +{
  27645. + while(!list_empty(h)) {
  27646. + struct page *page = list_first_entry(h, struct page, lru);
  27647. +
  27648. + list_del(&page->lru);
  27649. + __free_slab(page->slab_cache, page);
  27650. + }
  27651. +}
  27652. +
  27653. #define need_reserve_slab_rcu \
  27654. (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
  27655. @@ -1560,6 +1583,12 @@
  27656. }
  27657. call_rcu(head, rcu_free_slab);
  27658. + } else if (irqs_disabled()) {
  27659. + struct slub_free_list *f = this_cpu_ptr(&slub_free_list);
  27660. +
  27661. + raw_spin_lock(&f->lock);
  27662. + list_add(&page->lru, &f->list);
  27663. + raw_spin_unlock(&f->lock);
  27664. } else
  27665. __free_slab(s, page);
  27666. }
  27667. @@ -1673,7 +1702,7 @@
  27668. if (!n || !n->nr_partial)
  27669. return NULL;
  27670. - spin_lock(&n->list_lock);
  27671. + raw_spin_lock(&n->list_lock);
  27672. list_for_each_entry_safe(page, page2, &n->partial, lru) {
  27673. void *t;
  27674. @@ -1698,7 +1727,7 @@
  27675. break;
  27676. }
  27677. - spin_unlock(&n->list_lock);
  27678. + raw_spin_unlock(&n->list_lock);
  27679. return object;
  27680. }
  27681. @@ -1944,7 +1973,7 @@
  27682. * that acquire_slab() will see a slab page that
  27683. * is frozen
  27684. */
  27685. - spin_lock(&n->list_lock);
  27686. + raw_spin_lock(&n->list_lock);
  27687. }
  27688. } else {
  27689. m = M_FULL;
  27690. @@ -1955,7 +1984,7 @@
  27691. * slabs from diagnostic functions will not see
  27692. * any frozen slabs.
  27693. */
  27694. - spin_lock(&n->list_lock);
  27695. + raw_spin_lock(&n->list_lock);
  27696. }
  27697. }
  27698. @@ -1990,7 +2019,7 @@
  27699. goto redo;
  27700. if (lock)
  27701. - spin_unlock(&n->list_lock);
  27702. + raw_spin_unlock(&n->list_lock);
  27703. if (m == M_FREE) {
  27704. stat(s, DEACTIVATE_EMPTY);
  27705. @@ -2022,10 +2051,10 @@
  27706. n2 = get_node(s, page_to_nid(page));
  27707. if (n != n2) {
  27708. if (n)
  27709. - spin_unlock(&n->list_lock);
  27710. + raw_spin_unlock(&n->list_lock);
  27711. n = n2;
  27712. - spin_lock(&n->list_lock);
  27713. + raw_spin_lock(&n->list_lock);
  27714. }
  27715. do {
  27716. @@ -2054,7 +2083,7 @@
  27717. }
  27718. if (n)
  27719. - spin_unlock(&n->list_lock);
  27720. + raw_spin_unlock(&n->list_lock);
  27721. while (discard_page) {
  27722. page = discard_page;
  27723. @@ -2093,14 +2122,21 @@
  27724. pobjects = oldpage->pobjects;
  27725. pages = oldpage->pages;
  27726. if (drain && pobjects > s->cpu_partial) {
  27727. + struct slub_free_list *f;
  27728. unsigned long flags;
  27729. + LIST_HEAD(tofree);
  27730. /*
  27731. * partial array is full. Move the existing
  27732. * set to the per node partial list.
  27733. */
  27734. local_irq_save(flags);
  27735. unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
  27736. + f = this_cpu_ptr(&slub_free_list);
  27737. + raw_spin_lock(&f->lock);
  27738. + list_splice_init(&f->list, &tofree);
  27739. + raw_spin_unlock(&f->lock);
  27740. local_irq_restore(flags);
  27741. + free_delayed(&tofree);
  27742. oldpage = NULL;
  27743. pobjects = 0;
  27744. pages = 0;
  27745. @@ -2172,7 +2208,22 @@
  27746. static void flush_all(struct kmem_cache *s)
  27747. {
  27748. + LIST_HEAD(tofree);
  27749. + int cpu;
  27750. +
  27751. on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
  27752. + for_each_online_cpu(cpu) {
  27753. + struct slub_free_list *f;
  27754. +
  27755. + if (!has_cpu_slab(cpu, s))
  27756. + continue;
  27757. +
  27758. + f = &per_cpu(slub_free_list, cpu);
  27759. + raw_spin_lock_irq(&f->lock);
  27760. + list_splice_init(&f->list, &tofree);
  27761. + raw_spin_unlock_irq(&f->lock);
  27762. + free_delayed(&tofree);
  27763. + }
  27764. }
  27765. /*
  27766. @@ -2208,10 +2259,10 @@
  27767. unsigned long x = 0;
  27768. struct page *page;
  27769. - spin_lock_irqsave(&n->list_lock, flags);
  27770. + raw_spin_lock_irqsave(&n->list_lock, flags);
  27771. list_for_each_entry(page, &n->partial, lru)
  27772. x += get_count(page);
  27773. - spin_unlock_irqrestore(&n->list_lock, flags);
  27774. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  27775. return x;
  27776. }
  27777. #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
  27778. @@ -2349,8 +2400,10 @@
  27779. * already disabled (which is the case for bulk allocation).
  27780. */
  27781. static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
  27782. - unsigned long addr, struct kmem_cache_cpu *c)
  27783. + unsigned long addr, struct kmem_cache_cpu *c,
  27784. + struct list_head *to_free)
  27785. {
  27786. + struct slub_free_list *f;
  27787. void *freelist;
  27788. struct page *page;
  27789. @@ -2410,6 +2463,13 @@
  27790. VM_BUG_ON(!c->page->frozen);
  27791. c->freelist = get_freepointer(s, freelist);
  27792. c->tid = next_tid(c->tid);
  27793. +
  27794. +out:
  27795. + f = this_cpu_ptr(&slub_free_list);
  27796. + raw_spin_lock(&f->lock);
  27797. + list_splice_init(&f->list, to_free);
  27798. + raw_spin_unlock(&f->lock);
  27799. +
  27800. return freelist;
  27801. new_slab:
  27802. @@ -2441,7 +2501,7 @@
  27803. deactivate_slab(s, page, get_freepointer(s, freelist));
  27804. c->page = NULL;
  27805. c->freelist = NULL;
  27806. - return freelist;
  27807. + goto out;
  27808. }
  27809. /*
  27810. @@ -2453,6 +2513,7 @@
  27811. {
  27812. void *p;
  27813. unsigned long flags;
  27814. + LIST_HEAD(tofree);
  27815. local_irq_save(flags);
  27816. #ifdef CONFIG_PREEMPT
  27817. @@ -2464,8 +2525,9 @@
  27818. c = this_cpu_ptr(s->cpu_slab);
  27819. #endif
  27820. - p = ___slab_alloc(s, gfpflags, node, addr, c);
  27821. + p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree);
  27822. local_irq_restore(flags);
  27823. + free_delayed(&tofree);
  27824. return p;
  27825. }
  27826. @@ -2652,7 +2714,7 @@
  27827. do {
  27828. if (unlikely(n)) {
  27829. - spin_unlock_irqrestore(&n->list_lock, flags);
  27830. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  27831. n = NULL;
  27832. }
  27833. prior = page->freelist;
  27834. @@ -2684,7 +2746,7 @@
  27835. * Otherwise the list_lock will synchronize with
  27836. * other processors updating the list of slabs.
  27837. */
  27838. - spin_lock_irqsave(&n->list_lock, flags);
  27839. + raw_spin_lock_irqsave(&n->list_lock, flags);
  27840. }
  27841. }
  27842. @@ -2726,7 +2788,7 @@
  27843. add_partial(n, page, DEACTIVATE_TO_TAIL);
  27844. stat(s, FREE_ADD_PARTIAL);
  27845. }
  27846. - spin_unlock_irqrestore(&n->list_lock, flags);
  27847. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  27848. return;
  27849. slab_empty:
  27850. @@ -2741,7 +2803,7 @@
  27851. remove_full(s, n, page);
  27852. }
  27853. - spin_unlock_irqrestore(&n->list_lock, flags);
  27854. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  27855. stat(s, FREE_SLAB);
  27856. discard_slab(s, page);
  27857. }
  27858. @@ -2913,6 +2975,7 @@
  27859. void **p)
  27860. {
  27861. struct kmem_cache_cpu *c;
  27862. + LIST_HEAD(to_free);
  27863. int i;
  27864. /* memcg and kmem_cache debug support */
  27865. @@ -2936,7 +2999,7 @@
  27866. * of re-populating per CPU c->freelist
  27867. */
  27868. p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
  27869. - _RET_IP_, c);
  27870. + _RET_IP_, c, &to_free);
  27871. if (unlikely(!p[i]))
  27872. goto error;
  27873. @@ -2948,6 +3011,7 @@
  27874. }
  27875. c->tid = next_tid(c->tid);
  27876. local_irq_enable();
  27877. + free_delayed(&to_free);
  27878. /* Clear memory outside IRQ disabled fastpath loop */
  27879. if (unlikely(flags & __GFP_ZERO)) {
  27880. @@ -3095,7 +3159,7 @@
  27881. init_kmem_cache_node(struct kmem_cache_node *n)
  27882. {
  27883. n->nr_partial = 0;
  27884. - spin_lock_init(&n->list_lock);
  27885. + raw_spin_lock_init(&n->list_lock);
  27886. INIT_LIST_HEAD(&n->partial);
  27887. #ifdef CONFIG_SLUB_DEBUG
  27888. atomic_long_set(&n->nr_slabs, 0);
  27889. @@ -3677,7 +3741,7 @@
  27890. for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
  27891. INIT_LIST_HEAD(promote + i);
  27892. - spin_lock_irqsave(&n->list_lock, flags);
  27893. + raw_spin_lock_irqsave(&n->list_lock, flags);
  27894. /*
  27895. * Build lists of slabs to discard or promote.
  27896. @@ -3708,7 +3772,7 @@
  27897. for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
  27898. list_splice(promote + i, &n->partial);
  27899. - spin_unlock_irqrestore(&n->list_lock, flags);
  27900. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  27901. /* Release empty slabs */
  27902. list_for_each_entry_safe(page, t, &discard, lru)
  27903. @@ -3884,6 +3948,12 @@
  27904. {
  27905. static __initdata struct kmem_cache boot_kmem_cache,
  27906. boot_kmem_cache_node;
  27907. + int cpu;
  27908. +
  27909. + for_each_possible_cpu(cpu) {
  27910. + raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
  27911. + INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
  27912. + }
  27913. if (debug_guardpage_minorder())
  27914. slub_max_order = 0;
  27915. @@ -4127,7 +4197,7 @@
  27916. struct page *page;
  27917. unsigned long flags;
  27918. - spin_lock_irqsave(&n->list_lock, flags);
  27919. + raw_spin_lock_irqsave(&n->list_lock, flags);
  27920. list_for_each_entry(page, &n->partial, lru) {
  27921. validate_slab_slab(s, page, map);
  27922. @@ -4149,7 +4219,7 @@
  27923. s->name, count, atomic_long_read(&n->nr_slabs));
  27924. out:
  27925. - spin_unlock_irqrestore(&n->list_lock, flags);
  27926. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  27927. return count;
  27928. }
  27929. @@ -4337,12 +4407,12 @@
  27930. if (!atomic_long_read(&n->nr_slabs))
  27931. continue;
  27932. - spin_lock_irqsave(&n->list_lock, flags);
  27933. + raw_spin_lock_irqsave(&n->list_lock, flags);
  27934. list_for_each_entry(page, &n->partial, lru)
  27935. process_slab(&t, s, page, alloc, map);
  27936. list_for_each_entry(page, &n->full, lru)
  27937. process_slab(&t, s, page, alloc, map);
  27938. - spin_unlock_irqrestore(&n->list_lock, flags);
  27939. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  27940. }
  27941. for (i = 0; i < t.count; i++) {
  27942. diff -Nur linux-4.4.62.orig/mm/swap.c linux-4.4.62/mm/swap.c
  27943. --- linux-4.4.62.orig/mm/swap.c 2017-04-18 07:15:37.000000000 +0200
  27944. +++ linux-4.4.62/mm/swap.c 2017-04-18 17:38:08.234650796 +0200
  27945. @@ -31,6 +31,7 @@
  27946. #include <linux/memcontrol.h>
  27947. #include <linux/gfp.h>
  27948. #include <linux/uio.h>
  27949. +#include <linux/locallock.h>
  27950. #include <linux/hugetlb.h>
  27951. #include <linux/page_idle.h>
  27952. @@ -46,6 +47,9 @@
  27953. static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
  27954. static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);
  27955. +static DEFINE_LOCAL_IRQ_LOCK(rotate_lock);
  27956. +DEFINE_LOCAL_IRQ_LOCK(swapvec_lock);
  27957. +
  27958. /*
  27959. * This path almost never happens for VM activity - pages are normally
  27960. * freed via pagevecs. But it gets used by networking.
  27961. @@ -481,11 +485,11 @@
  27962. unsigned long flags;
  27963. page_cache_get(page);
  27964. - local_irq_save(flags);
  27965. + local_lock_irqsave(rotate_lock, flags);
  27966. pvec = this_cpu_ptr(&lru_rotate_pvecs);
  27967. if (!pagevec_add(pvec, page))
  27968. pagevec_move_tail(pvec);
  27969. - local_irq_restore(flags);
  27970. + local_unlock_irqrestore(rotate_lock, flags);
  27971. }
  27972. }
  27973. @@ -536,12 +540,13 @@
  27974. void activate_page(struct page *page)
  27975. {
  27976. if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
  27977. - struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
  27978. + struct pagevec *pvec = &get_locked_var(swapvec_lock,
  27979. + activate_page_pvecs);
  27980. page_cache_get(page);
  27981. if (!pagevec_add(pvec, page))
  27982. pagevec_lru_move_fn(pvec, __activate_page, NULL);
  27983. - put_cpu_var(activate_page_pvecs);
  27984. + put_locked_var(swapvec_lock, activate_page_pvecs);
  27985. }
  27986. }
  27987. @@ -567,7 +572,7 @@
  27988. static void __lru_cache_activate_page(struct page *page)
  27989. {
  27990. - struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
  27991. + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
  27992. int i;
  27993. /*
  27994. @@ -589,7 +594,7 @@
  27995. }
  27996. }
  27997. - put_cpu_var(lru_add_pvec);
  27998. + put_locked_var(swapvec_lock, lru_add_pvec);
  27999. }
  28000. /*
  28001. @@ -630,13 +635,13 @@
  28002. static void __lru_cache_add(struct page *page)
  28003. {
  28004. - struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
  28005. + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
  28006. page_cache_get(page);
  28007. if (!pagevec_space(pvec))
  28008. __pagevec_lru_add(pvec);
  28009. pagevec_add(pvec, page);
  28010. - put_cpu_var(lru_add_pvec);
  28011. + put_locked_var(swapvec_lock, lru_add_pvec);
  28012. }
  28013. /**
  28014. @@ -816,9 +821,15 @@
  28015. unsigned long flags;
  28016. /* No harm done if a racing interrupt already did this */
  28017. - local_irq_save(flags);
  28018. +#ifdef CONFIG_PREEMPT_RT_BASE
  28019. + local_lock_irqsave_on(rotate_lock, flags, cpu);
  28020. + pagevec_move_tail(pvec);
  28021. + local_unlock_irqrestore_on(rotate_lock, flags, cpu);
  28022. +#else
  28023. + local_lock_irqsave(rotate_lock, flags);
  28024. pagevec_move_tail(pvec);
  28025. - local_irq_restore(flags);
  28026. + local_unlock_irqrestore(rotate_lock, flags);
  28027. +#endif
  28028. }
  28029. pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
  28030. @@ -846,26 +857,47 @@
  28031. return;
  28032. if (likely(get_page_unless_zero(page))) {
  28033. - struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
  28034. + struct pagevec *pvec = &get_locked_var(swapvec_lock,
  28035. + lru_deactivate_file_pvecs);
  28036. if (!pagevec_add(pvec, page))
  28037. pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
  28038. - put_cpu_var(lru_deactivate_file_pvecs);
  28039. + put_locked_var(swapvec_lock, lru_deactivate_file_pvecs);
  28040. }
  28041. }
  28042. void lru_add_drain(void)
  28043. {
  28044. - lru_add_drain_cpu(get_cpu());
  28045. - put_cpu();
  28046. + lru_add_drain_cpu(local_lock_cpu(swapvec_lock));
  28047. + local_unlock_cpu(swapvec_lock);
  28048. }
  28049. +
  28050. +#ifdef CONFIG_PREEMPT_RT_BASE
  28051. +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
  28052. +{
  28053. + local_lock_on(swapvec_lock, cpu);
  28054. + lru_add_drain_cpu(cpu);
  28055. + local_unlock_on(swapvec_lock, cpu);
  28056. +}
  28057. +
  28058. +#else
  28059. +
  28060. static void lru_add_drain_per_cpu(struct work_struct *dummy)
  28061. {
  28062. lru_add_drain();
  28063. }
  28064. static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
  28065. +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
  28066. +{
  28067. + struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
  28068. +
  28069. + INIT_WORK(work, lru_add_drain_per_cpu);
  28070. + schedule_work_on(cpu, work);
  28071. + cpumask_set_cpu(cpu, has_work);
  28072. +}
  28073. +#endif
  28074. void lru_add_drain_all(void)
  28075. {
  28076. @@ -878,20 +910,17 @@
  28077. cpumask_clear(&has_work);
  28078. for_each_online_cpu(cpu) {
  28079. - struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
  28080. -
  28081. if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
  28082. pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
  28083. pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
  28084. - need_activate_page_drain(cpu)) {
  28085. - INIT_WORK(work, lru_add_drain_per_cpu);
  28086. - schedule_work_on(cpu, work);
  28087. - cpumask_set_cpu(cpu, &has_work);
  28088. - }
  28089. + need_activate_page_drain(cpu))
  28090. + remote_lru_add_drain(cpu, &has_work);
  28091. }
  28092. +#ifndef CONFIG_PREEMPT_RT_BASE
  28093. for_each_cpu(cpu, &has_work)
  28094. flush_work(&per_cpu(lru_add_drain_work, cpu));
  28095. +#endif
  28096. put_online_cpus();
  28097. mutex_unlock(&lock);
  28098. diff -Nur linux-4.4.62.orig/mm/truncate.c linux-4.4.62/mm/truncate.c
  28099. --- linux-4.4.62.orig/mm/truncate.c 2017-04-18 07:15:37.000000000 +0200
  28100. +++ linux-4.4.62/mm/truncate.c 2017-04-18 17:38:08.234650796 +0200
  28101. @@ -56,8 +56,11 @@
  28102. * protected by mapping->tree_lock.
  28103. */
  28104. if (!workingset_node_shadows(node) &&
  28105. - !list_empty(&node->private_list))
  28106. - list_lru_del(&workingset_shadow_nodes, &node->private_list);
  28107. + !list_empty(&node->private_list)) {
  28108. + local_lock(workingset_shadow_lock);
  28109. + list_lru_del(&__workingset_shadow_nodes, &node->private_list);
  28110. + local_unlock(workingset_shadow_lock);
  28111. + }
  28112. __radix_tree_delete_node(&mapping->page_tree, node);
  28113. unlock:
  28114. spin_unlock_irq(&mapping->tree_lock);
  28115. diff -Nur linux-4.4.62.orig/mm/vmalloc.c linux-4.4.62/mm/vmalloc.c
  28116. --- linux-4.4.62.orig/mm/vmalloc.c 2017-04-18 07:15:37.000000000 +0200
  28117. +++ linux-4.4.62/mm/vmalloc.c 2017-04-18 17:38:08.234650796 +0200
  28118. @@ -821,7 +821,7 @@
  28119. struct vmap_block *vb;
  28120. struct vmap_area *va;
  28121. unsigned long vb_idx;
  28122. - int node, err;
  28123. + int node, err, cpu;
  28124. void *vaddr;
  28125. node = numa_node_id();
  28126. @@ -864,11 +864,12 @@
  28127. BUG_ON(err);
  28128. radix_tree_preload_end();
  28129. - vbq = &get_cpu_var(vmap_block_queue);
  28130. + cpu = get_cpu_light();
  28131. + vbq = this_cpu_ptr(&vmap_block_queue);
  28132. spin_lock(&vbq->lock);
  28133. list_add_tail_rcu(&vb->free_list, &vbq->free);
  28134. spin_unlock(&vbq->lock);
  28135. - put_cpu_var(vmap_block_queue);
  28136. + put_cpu_light();
  28137. return vaddr;
  28138. }
  28139. @@ -937,6 +938,7 @@
  28140. struct vmap_block *vb;
  28141. void *vaddr = NULL;
  28142. unsigned int order;
  28143. + int cpu;
  28144. BUG_ON(offset_in_page(size));
  28145. BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
  28146. @@ -951,7 +953,8 @@
  28147. order = get_order(size);
  28148. rcu_read_lock();
  28149. - vbq = &get_cpu_var(vmap_block_queue);
  28150. + cpu = get_cpu_light();
  28151. + vbq = this_cpu_ptr(&vmap_block_queue);
  28152. list_for_each_entry_rcu(vb, &vbq->free, free_list) {
  28153. unsigned long pages_off;
  28154. @@ -974,7 +977,7 @@
  28155. break;
  28156. }
  28157. - put_cpu_var(vmap_block_queue);
  28158. + put_cpu_light();
  28159. rcu_read_unlock();
  28160. /* Allocate new block if nothing was found */
  28161. diff -Nur linux-4.4.62.orig/mm/vmstat.c linux-4.4.62/mm/vmstat.c
  28162. --- linux-4.4.62.orig/mm/vmstat.c 2017-04-18 07:15:37.000000000 +0200
  28163. +++ linux-4.4.62/mm/vmstat.c 2017-04-18 17:38:08.238650951 +0200
  28164. @@ -226,6 +226,7 @@
  28165. long x;
  28166. long t;
  28167. + preempt_disable_rt();
  28168. x = delta + __this_cpu_read(*p);
  28169. t = __this_cpu_read(pcp->stat_threshold);
  28170. @@ -235,6 +236,7 @@
  28171. x = 0;
  28172. }
  28173. __this_cpu_write(*p, x);
  28174. + preempt_enable_rt();
  28175. }
  28176. EXPORT_SYMBOL(__mod_zone_page_state);
  28177. @@ -267,6 +269,7 @@
  28178. s8 __percpu *p = pcp->vm_stat_diff + item;
  28179. s8 v, t;
  28180. + preempt_disable_rt();
  28181. v = __this_cpu_inc_return(*p);
  28182. t = __this_cpu_read(pcp->stat_threshold);
  28183. if (unlikely(v > t)) {
  28184. @@ -275,6 +278,7 @@
  28185. zone_page_state_add(v + overstep, zone, item);
  28186. __this_cpu_write(*p, -overstep);
  28187. }
  28188. + preempt_enable_rt();
  28189. }
  28190. void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
  28191. @@ -289,6 +293,7 @@
  28192. s8 __percpu *p = pcp->vm_stat_diff + item;
  28193. s8 v, t;
  28194. + preempt_disable_rt();
  28195. v = __this_cpu_dec_return(*p);
  28196. t = __this_cpu_read(pcp->stat_threshold);
  28197. if (unlikely(v < - t)) {
  28198. @@ -297,6 +302,7 @@
  28199. zone_page_state_add(v - overstep, zone, item);
  28200. __this_cpu_write(*p, overstep);
  28201. }
  28202. + preempt_enable_rt();
  28203. }
  28204. void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
  28205. diff -Nur linux-4.4.62.orig/mm/workingset.c linux-4.4.62/mm/workingset.c
  28206. --- linux-4.4.62.orig/mm/workingset.c 2017-04-18 07:15:37.000000000 +0200
  28207. +++ linux-4.4.62/mm/workingset.c 2017-04-18 17:38:08.238650951 +0200
  28208. @@ -264,7 +264,8 @@
  28209. * point where they would still be useful.
  28210. */
  28211. -struct list_lru workingset_shadow_nodes;
  28212. +struct list_lru __workingset_shadow_nodes;
  28213. +DEFINE_LOCAL_IRQ_LOCK(workingset_shadow_lock);
  28214. static unsigned long count_shadow_nodes(struct shrinker *shrinker,
  28215. struct shrink_control *sc)
  28216. @@ -274,9 +275,9 @@
  28217. unsigned long pages;
  28218. /* list_lru lock nests inside IRQ-safe mapping->tree_lock */
  28219. - local_irq_disable();
  28220. - shadow_nodes = list_lru_shrink_count(&workingset_shadow_nodes, sc);
  28221. - local_irq_enable();
  28222. + local_lock_irq(workingset_shadow_lock);
  28223. + shadow_nodes = list_lru_shrink_count(&__workingset_shadow_nodes, sc);
  28224. + local_unlock_irq(workingset_shadow_lock);
  28225. pages = node_present_pages(sc->nid);
  28226. /*
  28227. @@ -361,9 +362,9 @@
  28228. spin_unlock(&mapping->tree_lock);
  28229. ret = LRU_REMOVED_RETRY;
  28230. out:
  28231. - local_irq_enable();
  28232. + local_unlock_irq(workingset_shadow_lock);
  28233. cond_resched();
  28234. - local_irq_disable();
  28235. + local_lock_irq(workingset_shadow_lock);
  28236. spin_lock(lru_lock);
  28237. return ret;
  28238. }
  28239. @@ -374,10 +375,10 @@
  28240. unsigned long ret;
  28241. /* list_lru lock nests inside IRQ-safe mapping->tree_lock */
  28242. - local_irq_disable();
  28243. - ret = list_lru_shrink_walk(&workingset_shadow_nodes, sc,
  28244. + local_lock_irq(workingset_shadow_lock);
  28245. + ret = list_lru_shrink_walk(&__workingset_shadow_nodes, sc,
  28246. shadow_lru_isolate, NULL);
  28247. - local_irq_enable();
  28248. + local_unlock_irq(workingset_shadow_lock);
  28249. return ret;
  28250. }
  28251. @@ -398,7 +399,7 @@
  28252. {
  28253. int ret;
  28254. - ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key);
  28255. + ret = list_lru_init_key(&__workingset_shadow_nodes, &shadow_nodes_key);
  28256. if (ret)
  28257. goto err;
  28258. ret = register_shrinker(&workingset_shadow_shrinker);
  28259. @@ -406,7 +407,7 @@
  28260. goto err_list_lru;
  28261. return 0;
  28262. err_list_lru:
  28263. - list_lru_destroy(&workingset_shadow_nodes);
  28264. + list_lru_destroy(&__workingset_shadow_nodes);
  28265. err:
  28266. return ret;
  28267. }
  28268. diff -Nur linux-4.4.62.orig/mm/zsmalloc.c linux-4.4.62/mm/zsmalloc.c
  28269. --- linux-4.4.62.orig/mm/zsmalloc.c 2017-04-18 07:15:37.000000000 +0200
  28270. +++ linux-4.4.62/mm/zsmalloc.c 2017-04-18 17:38:08.238650951 +0200
  28271. @@ -64,6 +64,7 @@
  28272. #include <linux/debugfs.h>
  28273. #include <linux/zsmalloc.h>
  28274. #include <linux/zpool.h>
  28275. +#include <linux/locallock.h>
  28276. /*
  28277. * This must be power of 2 and greater than of equal to sizeof(link_free).
  28278. @@ -403,6 +404,7 @@
  28279. /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
  28280. static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
  28281. +static DEFINE_LOCAL_IRQ_LOCK(zs_map_area_lock);
  28282. static int is_first_page(struct page *page)
  28283. {
  28284. @@ -1289,7 +1291,7 @@
  28285. class = pool->size_class[class_idx];
  28286. off = obj_idx_to_offset(page, obj_idx, class->size);
  28287. - area = &get_cpu_var(zs_map_area);
  28288. + area = &get_locked_var(zs_map_area_lock, zs_map_area);
  28289. area->vm_mm = mm;
  28290. if (off + class->size <= PAGE_SIZE) {
  28291. /* this object is contained entirely within a page */
  28292. @@ -1342,7 +1344,7 @@
  28293. __zs_unmap_object(area, pages, off, class->size);
  28294. }
  28295. - put_cpu_var(zs_map_area);
  28296. + put_locked_var(zs_map_area_lock, zs_map_area);
  28297. unpin_tag(handle);
  28298. }
  28299. EXPORT_SYMBOL_GPL(zs_unmap_object);
  28300. diff -Nur linux-4.4.62.orig/net/core/dev.c linux-4.4.62/net/core/dev.c
  28301. --- linux-4.4.62.orig/net/core/dev.c 2017-04-18 07:15:37.000000000 +0200
  28302. +++ linux-4.4.62/net/core/dev.c 2017-04-18 17:38:08.238650951 +0200
  28303. @@ -186,6 +186,7 @@
  28304. static DEFINE_HASHTABLE(napi_hash, 8);
  28305. static seqcount_t devnet_rename_seq;
  28306. +static DEFINE_MUTEX(devnet_rename_mutex);
  28307. static inline void dev_base_seq_inc(struct net *net)
  28308. {
  28309. @@ -207,14 +208,14 @@
  28310. static inline void rps_lock(struct softnet_data *sd)
  28311. {
  28312. #ifdef CONFIG_RPS
  28313. - spin_lock(&sd->input_pkt_queue.lock);
  28314. + raw_spin_lock(&sd->input_pkt_queue.raw_lock);
  28315. #endif
  28316. }
  28317. static inline void rps_unlock(struct softnet_data *sd)
  28318. {
  28319. #ifdef CONFIG_RPS
  28320. - spin_unlock(&sd->input_pkt_queue.lock);
  28321. + raw_spin_unlock(&sd->input_pkt_queue.raw_lock);
  28322. #endif
  28323. }
  28324. @@ -884,7 +885,8 @@
  28325. strcpy(name, dev->name);
  28326. rcu_read_unlock();
  28327. if (read_seqcount_retry(&devnet_rename_seq, seq)) {
  28328. - cond_resched();
  28329. + mutex_lock(&devnet_rename_mutex);
  28330. + mutex_unlock(&devnet_rename_mutex);
  28331. goto retry;
  28332. }
  28333. @@ -1153,20 +1155,17 @@
  28334. if (dev->flags & IFF_UP)
  28335. return -EBUSY;
  28336. - write_seqcount_begin(&devnet_rename_seq);
  28337. + mutex_lock(&devnet_rename_mutex);
  28338. + __raw_write_seqcount_begin(&devnet_rename_seq);
  28339. - if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
  28340. - write_seqcount_end(&devnet_rename_seq);
  28341. - return 0;
  28342. - }
  28343. + if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
  28344. + goto outunlock;
  28345. memcpy(oldname, dev->name, IFNAMSIZ);
  28346. err = dev_get_valid_name(net, dev, newname);
  28347. - if (err < 0) {
  28348. - write_seqcount_end(&devnet_rename_seq);
  28349. - return err;
  28350. - }
  28351. + if (err < 0)
  28352. + goto outunlock;
  28353. if (oldname[0] && !strchr(oldname, '%'))
  28354. netdev_info(dev, "renamed from %s\n", oldname);
  28355. @@ -1179,11 +1178,12 @@
  28356. if (ret) {
  28357. memcpy(dev->name, oldname, IFNAMSIZ);
  28358. dev->name_assign_type = old_assign_type;
  28359. - write_seqcount_end(&devnet_rename_seq);
  28360. - return ret;
  28361. + err = ret;
  28362. + goto outunlock;
  28363. }
  28364. - write_seqcount_end(&devnet_rename_seq);
  28365. + __raw_write_seqcount_end(&devnet_rename_seq);
  28366. + mutex_unlock(&devnet_rename_mutex);
  28367. netdev_adjacent_rename_links(dev, oldname);
  28368. @@ -1204,7 +1204,8 @@
  28369. /* err >= 0 after dev_alloc_name() or stores the first errno */
  28370. if (err >= 0) {
  28371. err = ret;
  28372. - write_seqcount_begin(&devnet_rename_seq);
  28373. + mutex_lock(&devnet_rename_mutex);
  28374. + __raw_write_seqcount_begin(&devnet_rename_seq);
  28375. memcpy(dev->name, oldname, IFNAMSIZ);
  28376. memcpy(oldname, newname, IFNAMSIZ);
  28377. dev->name_assign_type = old_assign_type;
  28378. @@ -1217,6 +1218,11 @@
  28379. }
  28380. return err;
  28381. +
  28382. +outunlock:
  28383. + __raw_write_seqcount_end(&devnet_rename_seq);
  28384. + mutex_unlock(&devnet_rename_mutex);
  28385. + return err;
  28386. }
  28387. /**
  28388. @@ -2268,6 +2274,7 @@
  28389. sd->output_queue_tailp = &q->next_sched;
  28390. raise_softirq_irqoff(NET_TX_SOFTIRQ);
  28391. local_irq_restore(flags);
  28392. + preempt_check_resched_rt();
  28393. }
  28394. void __netif_schedule(struct Qdisc *q)
  28395. @@ -2349,6 +2356,7 @@
  28396. __this_cpu_write(softnet_data.completion_queue, skb);
  28397. raise_softirq_irqoff(NET_TX_SOFTIRQ);
  28398. local_irq_restore(flags);
  28399. + preempt_check_resched_rt();
  28400. }
  28401. EXPORT_SYMBOL(__dev_kfree_skb_irq);
  28402. @@ -2906,7 +2914,11 @@
  28403. * This permits __QDISC___STATE_RUNNING owner to get the lock more
  28404. * often and dequeue packets faster.
  28405. */
  28406. +#ifdef CONFIG_PREEMPT_RT_FULL
  28407. + contended = true;
  28408. +#else
  28409. contended = qdisc_is_running(q);
  28410. +#endif
  28411. if (unlikely(contended))
  28412. spin_lock(&q->busylock);
  28413. @@ -2966,9 +2978,44 @@
  28414. #define skb_update_prio(skb)
  28415. #endif
  28416. +#ifdef CONFIG_PREEMPT_RT_FULL
  28417. +
  28418. +static inline int xmit_rec_read(void)
  28419. +{
  28420. + return current->xmit_recursion;
  28421. +}
  28422. +
  28423. +static inline void xmit_rec_inc(void)
  28424. +{
  28425. + current->xmit_recursion++;
  28426. +}
  28427. +
  28428. +static inline void xmit_rec_dec(void)
  28429. +{
  28430. + current->xmit_recursion--;
  28431. +}
  28432. +
  28433. +#else
  28434. +
  28435. DEFINE_PER_CPU(int, xmit_recursion);
  28436. EXPORT_SYMBOL(xmit_recursion);
  28437. +static inline int xmit_rec_read(void)
  28438. +{
  28439. + return __this_cpu_read(xmit_recursion);
  28440. +}
  28441. +
  28442. +static inline void xmit_rec_inc(void)
  28443. +{
  28444. + __this_cpu_inc(xmit_recursion);
  28445. +}
  28446. +
  28447. +static inline void xmit_rec_dec(void)
  28448. +{
  28449. + __this_cpu_dec(xmit_recursion);
  28450. +}
  28451. +#endif
  28452. +
  28453. #define RECURSION_LIMIT 10
  28454. /**
  28455. @@ -3161,7 +3208,7 @@
  28456. if (txq->xmit_lock_owner != cpu) {
  28457. - if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
  28458. + if (xmit_rec_read() > RECURSION_LIMIT)
  28459. goto recursion_alert;
  28460. skb = validate_xmit_skb(skb, dev);
  28461. @@ -3171,9 +3218,9 @@
  28462. HARD_TX_LOCK(dev, txq, cpu);
  28463. if (!netif_xmit_stopped(txq)) {
  28464. - __this_cpu_inc(xmit_recursion);
  28465. + xmit_rec_inc();
  28466. skb = dev_hard_start_xmit(skb, dev, txq, &rc);
  28467. - __this_cpu_dec(xmit_recursion);
  28468. + xmit_rec_dec();
  28469. if (dev_xmit_complete(rc)) {
  28470. HARD_TX_UNLOCK(dev, txq);
  28471. goto out;
  28472. @@ -3547,6 +3594,7 @@
  28473. rps_unlock(sd);
  28474. local_irq_restore(flags);
  28475. + preempt_check_resched_rt();
  28476. atomic_long_inc(&skb->dev->rx_dropped);
  28477. kfree_skb(skb);
  28478. @@ -3565,7 +3613,7 @@
  28479. struct rps_dev_flow voidflow, *rflow = &voidflow;
  28480. int cpu;
  28481. - preempt_disable();
  28482. + migrate_disable();
  28483. rcu_read_lock();
  28484. cpu = get_rps_cpu(skb->dev, skb, &rflow);
  28485. @@ -3575,13 +3623,13 @@
  28486. ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
  28487. rcu_read_unlock();
  28488. - preempt_enable();
  28489. + migrate_enable();
  28490. } else
  28491. #endif
  28492. {
  28493. unsigned int qtail;
  28494. - ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
  28495. - put_cpu();
  28496. + ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail);
  28497. + put_cpu_light();
  28498. }
  28499. return ret;
  28500. }
  28501. @@ -3615,16 +3663,44 @@
  28502. trace_netif_rx_ni_entry(skb);
  28503. - preempt_disable();
  28504. + local_bh_disable();
  28505. err = netif_rx_internal(skb);
  28506. - if (local_softirq_pending())
  28507. - do_softirq();
  28508. - preempt_enable();
  28509. + local_bh_enable();
  28510. return err;
  28511. }
  28512. EXPORT_SYMBOL(netif_rx_ni);
  28513. +#ifdef CONFIG_PREEMPT_RT_FULL
  28514. +/*
  28515. + * RT runs ksoftirqd as a real time thread and the root_lock is a
  28516. + * "sleeping spinlock". If the trylock fails then we can go into an
  28517. + * infinite loop when ksoftirqd preempted the task which actually
  28518. + * holds the lock, because we requeue q and raise NET_TX softirq
  28519. + * causing ksoftirqd to loop forever.
  28520. + *
  28521. + * It's safe to use spin_lock on RT here as softirqs run in thread
  28522. + * context and cannot deadlock against the thread which is holding
  28523. + * root_lock.
  28524. + *
  28525. + * On !RT the trylock might fail, but there we bail out from the
  28526. + * softirq loop after 10 attempts which we can't do on RT. And the
  28527. + * task holding root_lock cannot be preempted, so the only downside of
  28528. + * that trylock is that we need 10 loops to decide that we should have
  28529. + * given up in the first one :)
  28530. + */
  28531. +static inline int take_root_lock(spinlock_t *lock)
  28532. +{
  28533. + spin_lock(lock);
  28534. + return 1;
  28535. +}
  28536. +#else
  28537. +static inline int take_root_lock(spinlock_t *lock)
  28538. +{
  28539. + return spin_trylock(lock);
  28540. +}
  28541. +#endif
  28542. +
  28543. static void net_tx_action(struct softirq_action *h)
  28544. {
  28545. struct softnet_data *sd = this_cpu_ptr(&softnet_data);
  28546. @@ -3666,7 +3742,7 @@
  28547. head = head->next_sched;
  28548. root_lock = qdisc_lock(q);
  28549. - if (spin_trylock(root_lock)) {
  28550. + if (take_root_lock(root_lock)) {
  28551. smp_mb__before_atomic();
  28552. clear_bit(__QDISC_STATE_SCHED,
  28553. &q->state);
  28554. @@ -4088,7 +4164,7 @@
  28555. skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
  28556. if (skb->dev == dev) {
  28557. __skb_unlink(skb, &sd->input_pkt_queue);
  28558. - kfree_skb(skb);
  28559. + __skb_queue_tail(&sd->tofree_queue, skb);
  28560. input_queue_head_incr(sd);
  28561. }
  28562. }
  28563. @@ -4097,10 +4173,13 @@
  28564. skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
  28565. if (skb->dev == dev) {
  28566. __skb_unlink(skb, &sd->process_queue);
  28567. - kfree_skb(skb);
  28568. + __skb_queue_tail(&sd->tofree_queue, skb);
  28569. input_queue_head_incr(sd);
  28570. }
  28571. }
  28572. +
  28573. + if (!skb_queue_empty(&sd->tofree_queue))
  28574. + raise_softirq_irqoff(NET_RX_SOFTIRQ);
  28575. }
  28576. static int napi_gro_complete(struct sk_buff *skb)
  28577. @@ -4557,6 +4636,7 @@
  28578. sd->rps_ipi_list = NULL;
  28579. local_irq_enable();
  28580. + preempt_check_resched_rt();
  28581. /* Send pending IPI's to kick RPS processing on remote cpus. */
  28582. while (remsd) {
  28583. @@ -4570,6 +4650,7 @@
  28584. } else
  28585. #endif
  28586. local_irq_enable();
  28587. + preempt_check_resched_rt();
  28588. }
  28589. static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
  28590. @@ -4651,9 +4732,11 @@
  28591. local_irq_save(flags);
  28592. ____napi_schedule(this_cpu_ptr(&softnet_data), n);
  28593. local_irq_restore(flags);
  28594. + preempt_check_resched_rt();
  28595. }
  28596. EXPORT_SYMBOL(__napi_schedule);
  28597. +#ifndef CONFIG_PREEMPT_RT_FULL
  28598. /**
  28599. * __napi_schedule_irqoff - schedule for receive
  28600. * @n: entry to schedule
  28601. @@ -4665,6 +4748,7 @@
  28602. ____napi_schedule(this_cpu_ptr(&softnet_data), n);
  28603. }
  28604. EXPORT_SYMBOL(__napi_schedule_irqoff);
  28605. +#endif
  28606. void __napi_complete(struct napi_struct *n)
  28607. {
  28608. @@ -4891,13 +4975,21 @@
  28609. struct softnet_data *sd = this_cpu_ptr(&softnet_data);
  28610. unsigned long time_limit = jiffies + 2;
  28611. int budget = netdev_budget;
  28612. + struct sk_buff_head tofree_q;
  28613. + struct sk_buff *skb;
  28614. LIST_HEAD(list);
  28615. LIST_HEAD(repoll);
  28616. + __skb_queue_head_init(&tofree_q);
  28617. +
  28618. local_irq_disable();
  28619. + skb_queue_splice_init(&sd->tofree_queue, &tofree_q);
  28620. list_splice_init(&sd->poll_list, &list);
  28621. local_irq_enable();
  28622. + while ((skb = __skb_dequeue(&tofree_q)))
  28623. + kfree_skb(skb);
  28624. +
  28625. for (;;) {
  28626. struct napi_struct *n;
  28627. @@ -4927,7 +5019,7 @@
  28628. list_splice_tail(&repoll, &list);
  28629. list_splice(&list, &sd->poll_list);
  28630. if (!list_empty(&sd->poll_list))
  28631. - __raise_softirq_irqoff(NET_RX_SOFTIRQ);
  28632. + __raise_softirq_irqoff_ksoft(NET_RX_SOFTIRQ);
  28633. net_rps_action_and_irq_enable(sd);
  28634. }
  28635. @@ -7266,7 +7358,7 @@
  28636. void synchronize_net(void)
  28637. {
  28638. might_sleep();
  28639. - if (rtnl_is_locked())
  28640. + if (rtnl_is_locked() && !IS_ENABLED(CONFIG_PREEMPT_RT_FULL))
  28641. synchronize_rcu_expedited();
  28642. else
  28643. synchronize_rcu();
  28644. @@ -7507,16 +7599,20 @@
  28645. raise_softirq_irqoff(NET_TX_SOFTIRQ);
  28646. local_irq_enable();
  28647. + preempt_check_resched_rt();
  28648. /* Process offline CPU's input_pkt_queue */
  28649. while ((skb = __skb_dequeue(&oldsd->process_queue))) {
  28650. netif_rx_ni(skb);
  28651. input_queue_head_incr(oldsd);
  28652. }
  28653. - while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
  28654. + while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
  28655. netif_rx_ni(skb);
  28656. input_queue_head_incr(oldsd);
  28657. }
  28658. + while ((skb = __skb_dequeue(&oldsd->tofree_queue))) {
  28659. + kfree_skb(skb);
  28660. + }
  28661. return NOTIFY_OK;
  28662. }
  28663. @@ -7818,8 +7914,9 @@
  28664. for_each_possible_cpu(i) {
  28665. struct softnet_data *sd = &per_cpu(softnet_data, i);
  28666. - skb_queue_head_init(&sd->input_pkt_queue);
  28667. - skb_queue_head_init(&sd->process_queue);
  28668. + skb_queue_head_init_raw(&sd->input_pkt_queue);
  28669. + skb_queue_head_init_raw(&sd->process_queue);
  28670. + skb_queue_head_init_raw(&sd->tofree_queue);
  28671. INIT_LIST_HEAD(&sd->poll_list);
  28672. sd->output_queue_tailp = &sd->output_queue;
  28673. #ifdef CONFIG_RPS
  28674. diff -Nur linux-4.4.62.orig/net/core/skbuff.c linux-4.4.62/net/core/skbuff.c
  28675. --- linux-4.4.62.orig/net/core/skbuff.c 2017-04-18 07:15:37.000000000 +0200
  28676. +++ linux-4.4.62/net/core/skbuff.c 2017-04-18 17:38:08.238650951 +0200
  28677. @@ -63,6 +63,7 @@
  28678. #include <linux/errqueue.h>
  28679. #include <linux/prefetch.h>
  28680. #include <linux/if_vlan.h>
  28681. +#include <linux/locallock.h>
  28682. #include <net/protocol.h>
  28683. #include <net/dst.h>
  28684. @@ -351,6 +352,8 @@
  28685. static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
  28686. static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache);
  28687. +static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock);
  28688. +static DEFINE_LOCAL_IRQ_LOCK(napi_alloc_cache_lock);
  28689. static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
  28690. {
  28691. @@ -358,10 +361,10 @@
  28692. unsigned long flags;
  28693. void *data;
  28694. - local_irq_save(flags);
  28695. + local_lock_irqsave(netdev_alloc_lock, flags);
  28696. nc = this_cpu_ptr(&netdev_alloc_cache);
  28697. data = __alloc_page_frag(nc, fragsz, gfp_mask);
  28698. - local_irq_restore(flags);
  28699. + local_unlock_irqrestore(netdev_alloc_lock, flags);
  28700. return data;
  28701. }
  28702. @@ -380,9 +383,13 @@
  28703. static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
  28704. {
  28705. - struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
  28706. + struct page_frag_cache *nc;
  28707. + void *data;
  28708. - return __alloc_page_frag(nc, fragsz, gfp_mask);
  28709. + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  28710. + data = __alloc_page_frag(nc, fragsz, gfp_mask);
  28711. + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  28712. + return data;
  28713. }
  28714. void *napi_alloc_frag(unsigned int fragsz)
  28715. @@ -429,13 +436,13 @@
  28716. if (sk_memalloc_socks())
  28717. gfp_mask |= __GFP_MEMALLOC;
  28718. - local_irq_save(flags);
  28719. + local_lock_irqsave(netdev_alloc_lock, flags);
  28720. nc = this_cpu_ptr(&netdev_alloc_cache);
  28721. data = __alloc_page_frag(nc, len, gfp_mask);
  28722. pfmemalloc = nc->pfmemalloc;
  28723. - local_irq_restore(flags);
  28724. + local_unlock_irqrestore(netdev_alloc_lock, flags);
  28725. if (unlikely(!data))
  28726. return NULL;
  28727. @@ -476,9 +483,10 @@
  28728. struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
  28729. gfp_t gfp_mask)
  28730. {
  28731. - struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
  28732. + struct page_frag_cache *nc;
  28733. struct sk_buff *skb;
  28734. void *data;
  28735. + bool pfmemalloc;
  28736. len += NET_SKB_PAD + NET_IP_ALIGN;
  28737. @@ -496,7 +504,11 @@
  28738. if (sk_memalloc_socks())
  28739. gfp_mask |= __GFP_MEMALLOC;
  28740. + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  28741. data = __alloc_page_frag(nc, len, gfp_mask);
  28742. + pfmemalloc = nc->pfmemalloc;
  28743. + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
  28744. +
  28745. if (unlikely(!data))
  28746. return NULL;
  28747. @@ -507,7 +519,7 @@
  28748. }
  28749. /* use OR instead of assignment to avoid clearing of bits in mask */
  28750. - if (nc->pfmemalloc)
  28751. + if (pfmemalloc)
  28752. skb->pfmemalloc = 1;
  28753. skb->head_frag = 1;
  28754. diff -Nur linux-4.4.62.orig/net/core/sock.c linux-4.4.62/net/core/sock.c
  28755. --- linux-4.4.62.orig/net/core/sock.c 2017-04-18 07:15:37.000000000 +0200
  28756. +++ linux-4.4.62/net/core/sock.c 2017-04-18 17:38:08.238650951 +0200
  28757. @@ -2447,12 +2447,11 @@
  28758. if (sk->sk_lock.owned)
  28759. __lock_sock(sk);
  28760. sk->sk_lock.owned = 1;
  28761. - spin_unlock(&sk->sk_lock.slock);
  28762. + spin_unlock_bh(&sk->sk_lock.slock);
  28763. /*
  28764. * The sk_lock has mutex_lock() semantics here:
  28765. */
  28766. mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
  28767. - local_bh_enable();
  28768. }
  28769. EXPORT_SYMBOL(lock_sock_nested);
  28770. diff -Nur linux-4.4.62.orig/net/ipv4/icmp.c linux-4.4.62/net/ipv4/icmp.c
  28771. --- linux-4.4.62.orig/net/ipv4/icmp.c 2017-04-18 07:15:37.000000000 +0200
  28772. +++ linux-4.4.62/net/ipv4/icmp.c 2017-04-18 17:38:08.238650951 +0200
  28773. @@ -69,6 +69,7 @@
  28774. #include <linux/jiffies.h>
  28775. #include <linux/kernel.h>
  28776. #include <linux/fcntl.h>
  28777. +#include <linux/sysrq.h>
  28778. #include <linux/socket.h>
  28779. #include <linux/in.h>
  28780. #include <linux/inet.h>
  28781. @@ -77,6 +78,7 @@
  28782. #include <linux/string.h>
  28783. #include <linux/netfilter_ipv4.h>
  28784. #include <linux/slab.h>
  28785. +#include <linux/locallock.h>
  28786. #include <net/snmp.h>
  28787. #include <net/ip.h>
  28788. #include <net/route.h>
  28789. @@ -204,6 +206,8 @@
  28790. *
  28791. * On SMP we have one ICMP socket per-cpu.
  28792. */
  28793. +static DEFINE_LOCAL_IRQ_LOCK(icmp_sk_lock);
  28794. +
  28795. static struct sock *icmp_sk(struct net *net)
  28796. {
  28797. return *this_cpu_ptr(net->ipv4.icmp_sk);
  28798. @@ -215,12 +219,14 @@
  28799. local_bh_disable();
  28800. + local_lock(icmp_sk_lock);
  28801. sk = icmp_sk(net);
  28802. if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
  28803. /* This can happen if the output path signals a
  28804. * dst_link_failure() for an outgoing ICMP packet.
  28805. */
  28806. + local_unlock(icmp_sk_lock);
  28807. local_bh_enable();
  28808. return NULL;
  28809. }
  28810. @@ -230,6 +236,7 @@
  28811. static inline void icmp_xmit_unlock(struct sock *sk)
  28812. {
  28813. spin_unlock_bh(&sk->sk_lock.slock);
  28814. + local_unlock(icmp_sk_lock);
  28815. }
  28816. int sysctl_icmp_msgs_per_sec __read_mostly = 1000;
  28817. @@ -358,6 +365,7 @@
  28818. struct sock *sk;
  28819. struct sk_buff *skb;
  28820. + local_lock(icmp_sk_lock);
  28821. sk = icmp_sk(dev_net((*rt)->dst.dev));
  28822. if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param,
  28823. icmp_param->data_len+icmp_param->head_len,
  28824. @@ -380,6 +388,7 @@
  28825. skb->ip_summed = CHECKSUM_NONE;
  28826. ip_push_pending_frames(sk, fl4);
  28827. }
  28828. + local_unlock(icmp_sk_lock);
  28829. }
  28830. /*
  28831. @@ -891,6 +900,30 @@
  28832. }
  28833. /*
  28834. + * 32bit and 64bit have different timestamp length, so we check for
  28835. + * the cookie at offset 20 and verify it is repeated at offset 50
  28836. + */
  28837. +#define CO_POS0 20
  28838. +#define CO_POS1 50
  28839. +#define CO_SIZE sizeof(int)
  28840. +#define ICMP_SYSRQ_SIZE 57
  28841. +
  28842. +/*
  28843. + * We got a ICMP_SYSRQ_SIZE sized ping request. Check for the cookie
  28844. + * pattern and if it matches send the next byte as a trigger to sysrq.
  28845. + */
  28846. +static void icmp_check_sysrq(struct net *net, struct sk_buff *skb)
  28847. +{
  28848. + int cookie = htonl(net->ipv4.sysctl_icmp_echo_sysrq);
  28849. + char *p = skb->data;
  28850. +
  28851. + if (!memcmp(&cookie, p + CO_POS0, CO_SIZE) &&
  28852. + !memcmp(&cookie, p + CO_POS1, CO_SIZE) &&
  28853. + p[CO_POS0 + CO_SIZE] == p[CO_POS1 + CO_SIZE])
  28854. + handle_sysrq(p[CO_POS0 + CO_SIZE]);
  28855. +}
  28856. +
  28857. +/*
  28858. * Handle ICMP_ECHO ("ping") requests.
  28859. *
  28860. * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo
  28861. @@ -917,6 +950,11 @@
  28862. icmp_param.data_len = skb->len;
  28863. icmp_param.head_len = sizeof(struct icmphdr);
  28864. icmp_reply(&icmp_param, skb);
  28865. +
  28866. + if (skb->len == ICMP_SYSRQ_SIZE &&
  28867. + net->ipv4.sysctl_icmp_echo_sysrq) {
  28868. + icmp_check_sysrq(net, skb);
  28869. + }
  28870. }
  28871. /* should there be an ICMP stat for ignored echos? */
  28872. return true;
  28873. diff -Nur linux-4.4.62.orig/net/ipv4/sysctl_net_ipv4.c linux-4.4.62/net/ipv4/sysctl_net_ipv4.c
  28874. --- linux-4.4.62.orig/net/ipv4/sysctl_net_ipv4.c 2017-04-18 07:15:37.000000000 +0200
  28875. +++ linux-4.4.62/net/ipv4/sysctl_net_ipv4.c 2017-04-18 17:38:08.238650951 +0200
  28876. @@ -818,6 +818,13 @@
  28877. .proc_handler = proc_dointvec
  28878. },
  28879. {
  28880. + .procname = "icmp_echo_sysrq",
  28881. + .data = &init_net.ipv4.sysctl_icmp_echo_sysrq,
  28882. + .maxlen = sizeof(int),
  28883. + .mode = 0644,
  28884. + .proc_handler = proc_dointvec
  28885. + },
  28886. + {
  28887. .procname = "icmp_ignore_bogus_error_responses",
  28888. .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
  28889. .maxlen = sizeof(int),
  28890. diff -Nur linux-4.4.62.orig/net/ipv4/tcp_ipv4.c linux-4.4.62/net/ipv4/tcp_ipv4.c
  28891. --- linux-4.4.62.orig/net/ipv4/tcp_ipv4.c 2017-04-18 07:15:37.000000000 +0200
  28892. +++ linux-4.4.62/net/ipv4/tcp_ipv4.c 2017-04-18 17:38:08.238650951 +0200
  28893. @@ -62,6 +62,7 @@
  28894. #include <linux/init.h>
  28895. #include <linux/times.h>
  28896. #include <linux/slab.h>
  28897. +#include <linux/locallock.h>
  28898. #include <net/net_namespace.h>
  28899. #include <net/icmp.h>
  28900. @@ -570,6 +571,7 @@
  28901. }
  28902. EXPORT_SYMBOL(tcp_v4_send_check);
  28903. +static DEFINE_LOCAL_IRQ_LOCK(tcp_sk_lock);
  28904. /*
  28905. * This routine will send an RST to the other tcp.
  28906. *
  28907. @@ -691,10 +693,13 @@
  28908. arg.bound_dev_if = sk->sk_bound_dev_if;
  28909. arg.tos = ip_hdr(skb)->tos;
  28910. +
  28911. + local_lock(tcp_sk_lock);
  28912. ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
  28913. skb, &TCP_SKB_CB(skb)->header.h4.opt,
  28914. ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
  28915. &arg, arg.iov[0].iov_len);
  28916. + local_unlock(tcp_sk_lock);
  28917. TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
  28918. TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
  28919. @@ -776,10 +781,12 @@
  28920. if (oif)
  28921. arg.bound_dev_if = oif;
  28922. arg.tos = tos;
  28923. + local_lock(tcp_sk_lock);
  28924. ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
  28925. skb, &TCP_SKB_CB(skb)->header.h4.opt,
  28926. ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
  28927. &arg, arg.iov[0].iov_len);
  28928. + local_unlock(tcp_sk_lock);
  28929. TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
  28930. }
  28931. diff -Nur linux-4.4.62.orig/net/mac80211/rx.c linux-4.4.62/net/mac80211/rx.c
  28932. --- linux-4.4.62.orig/net/mac80211/rx.c 2017-04-18 07:15:37.000000000 +0200
  28933. +++ linux-4.4.62/net/mac80211/rx.c 2017-04-18 17:38:08.242651106 +0200
  28934. @@ -3580,7 +3580,7 @@
  28935. struct ieee80211_supported_band *sband;
  28936. struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
  28937. - WARN_ON_ONCE(softirq_count() == 0);
  28938. + WARN_ON_ONCE_NONRT(softirq_count() == 0);
  28939. if (WARN_ON(status->band >= IEEE80211_NUM_BANDS))
  28940. goto drop;
  28941. diff -Nur linux-4.4.62.orig/net/netfilter/core.c linux-4.4.62/net/netfilter/core.c
  28942. --- linux-4.4.62.orig/net/netfilter/core.c 2017-04-18 07:15:37.000000000 +0200
  28943. +++ linux-4.4.62/net/netfilter/core.c 2017-04-18 17:38:08.242651106 +0200
  28944. @@ -22,11 +22,17 @@
  28945. #include <linux/proc_fs.h>
  28946. #include <linux/mutex.h>
  28947. #include <linux/slab.h>
  28948. +#include <linux/locallock.h>
  28949. #include <net/net_namespace.h>
  28950. #include <net/sock.h>
  28951. #include "nf_internals.h"
  28952. +#ifdef CONFIG_PREEMPT_RT_BASE
  28953. +DEFINE_LOCAL_IRQ_LOCK(xt_write_lock);
  28954. +EXPORT_PER_CPU_SYMBOL(xt_write_lock);
  28955. +#endif
  28956. +
  28957. static DEFINE_MUTEX(afinfo_mutex);
  28958. const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
  28959. diff -Nur linux-4.4.62.orig/net/packet/af_packet.c linux-4.4.62/net/packet/af_packet.c
  28960. --- linux-4.4.62.orig/net/packet/af_packet.c 2017-04-18 07:15:37.000000000 +0200
  28961. +++ linux-4.4.62/net/packet/af_packet.c 2017-04-18 17:38:08.242651106 +0200
  28962. @@ -63,6 +63,7 @@
  28963. #include <linux/if_packet.h>
  28964. #include <linux/wireless.h>
  28965. #include <linux/kernel.h>
  28966. +#include <linux/delay.h>
  28967. #include <linux/kmod.h>
  28968. #include <linux/slab.h>
  28969. #include <linux/vmalloc.h>
  28970. @@ -694,7 +695,7 @@
  28971. if (BLOCK_NUM_PKTS(pbd)) {
  28972. while (atomic_read(&pkc->blk_fill_in_prog)) {
  28973. /* Waiting for skb_copy_bits to finish... */
  28974. - cpu_relax();
  28975. + cpu_chill();
  28976. }
  28977. }
  28978. @@ -956,7 +957,7 @@
  28979. if (!(status & TP_STATUS_BLK_TMO)) {
  28980. while (atomic_read(&pkc->blk_fill_in_prog)) {
  28981. /* Waiting for skb_copy_bits to finish... */
  28982. - cpu_relax();
  28983. + cpu_chill();
  28984. }
  28985. }
  28986. prb_close_block(pkc, pbd, po, status);
  28987. diff -Nur linux-4.4.62.orig/net/rds/ib_rdma.c linux-4.4.62/net/rds/ib_rdma.c
  28988. --- linux-4.4.62.orig/net/rds/ib_rdma.c 2017-04-18 07:15:37.000000000 +0200
  28989. +++ linux-4.4.62/net/rds/ib_rdma.c 2017-04-18 17:38:08.242651106 +0200
  28990. @@ -34,6 +34,7 @@
  28991. #include <linux/slab.h>
  28992. #include <linux/rculist.h>
  28993. #include <linux/llist.h>
  28994. +#include <linux/delay.h>
  28995. #include "rds.h"
  28996. #include "ib.h"
  28997. @@ -313,7 +314,7 @@
  28998. for_each_online_cpu(cpu) {
  28999. flag = &per_cpu(clean_list_grace, cpu);
  29000. while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
  29001. - cpu_relax();
  29002. + cpu_chill();
  29003. }
  29004. }
  29005. diff -Nur linux-4.4.62.orig/net/sched/sch_generic.c linux-4.4.62/net/sched/sch_generic.c
  29006. --- linux-4.4.62.orig/net/sched/sch_generic.c 2017-04-18 07:15:37.000000000 +0200
  29007. +++ linux-4.4.62/net/sched/sch_generic.c 2017-04-18 17:38:08.242651106 +0200
  29008. @@ -893,7 +893,7 @@
  29009. /* Wait for outstanding qdisc_run calls. */
  29010. list_for_each_entry(dev, head, close_list)
  29011. while (some_qdisc_is_busy(dev))
  29012. - yield();
  29013. + msleep(1);
  29014. }
  29015. void dev_deactivate(struct net_device *dev)
  29016. diff -Nur linux-4.4.62.orig/net/sunrpc/svc_xprt.c linux-4.4.62/net/sunrpc/svc_xprt.c
  29017. --- linux-4.4.62.orig/net/sunrpc/svc_xprt.c 2017-04-18 07:15:37.000000000 +0200
  29018. +++ linux-4.4.62/net/sunrpc/svc_xprt.c 2017-04-18 17:38:08.242651106 +0200
  29019. @@ -340,7 +340,7 @@
  29020. goto out;
  29021. }
  29022. - cpu = get_cpu();
  29023. + cpu = get_cpu_light();
  29024. pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
  29025. atomic_long_inc(&pool->sp_stats.packets);
  29026. @@ -376,7 +376,7 @@
  29027. atomic_long_inc(&pool->sp_stats.threads_woken);
  29028. wake_up_process(rqstp->rq_task);
  29029. - put_cpu();
  29030. + put_cpu_light();
  29031. goto out;
  29032. }
  29033. rcu_read_unlock();
  29034. @@ -397,7 +397,7 @@
  29035. goto redo_search;
  29036. }
  29037. rqstp = NULL;
  29038. - put_cpu();
  29039. + put_cpu_light();
  29040. out:
  29041. trace_svc_xprt_do_enqueue(xprt, rqstp);
  29042. }
  29043. diff -Nur linux-4.4.62.orig/scripts/mkcompile_h linux-4.4.62/scripts/mkcompile_h
  29044. --- linux-4.4.62.orig/scripts/mkcompile_h 2017-04-18 07:15:37.000000000 +0200
  29045. +++ linux-4.4.62/scripts/mkcompile_h 2017-04-18 17:38:08.242651106 +0200
  29046. @@ -4,7 +4,8 @@
  29047. ARCH=$2
  29048. SMP=$3
  29049. PREEMPT=$4
  29050. -CC=$5
  29051. +RT=$5
  29052. +CC=$6
  29053. vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; }
  29054. @@ -57,6 +58,7 @@
  29055. CONFIG_FLAGS=""
  29056. if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi
  29057. if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi
  29058. +if [ -n "$RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS RT"; fi
  29059. UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP"
  29060. # Truncate to maximum length
  29061. diff -Nur linux-4.4.62.orig/sound/core/pcm_native.c linux-4.4.62/sound/core/pcm_native.c
  29062. --- linux-4.4.62.orig/sound/core/pcm_native.c 2017-04-18 07:15:37.000000000 +0200
  29063. +++ linux-4.4.62/sound/core/pcm_native.c 2017-04-18 17:38:08.242651106 +0200
  29064. @@ -135,7 +135,7 @@
  29065. void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream)
  29066. {
  29067. if (!substream->pcm->nonatomic)
  29068. - local_irq_disable();
  29069. + local_irq_disable_nort();
  29070. snd_pcm_stream_lock(substream);
  29071. }
  29072. EXPORT_SYMBOL_GPL(snd_pcm_stream_lock_irq);
  29073. @@ -150,7 +150,7 @@
  29074. {
  29075. snd_pcm_stream_unlock(substream);
  29076. if (!substream->pcm->nonatomic)
  29077. - local_irq_enable();
  29078. + local_irq_enable_nort();
  29079. }
  29080. EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irq);
  29081. @@ -158,7 +158,7 @@
  29082. {
  29083. unsigned long flags = 0;
  29084. if (!substream->pcm->nonatomic)
  29085. - local_irq_save(flags);
  29086. + local_irq_save_nort(flags);
  29087. snd_pcm_stream_lock(substream);
  29088. return flags;
  29089. }
  29090. @@ -176,7 +176,7 @@
  29091. {
  29092. snd_pcm_stream_unlock(substream);
  29093. if (!substream->pcm->nonatomic)
  29094. - local_irq_restore(flags);
  29095. + local_irq_restore_nort(flags);
  29096. }
  29097. EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irqrestore);
  29098. diff -Nur linux-4.4.62.orig/virt/kvm/async_pf.c linux-4.4.62/virt/kvm/async_pf.c
  29099. --- linux-4.4.62.orig/virt/kvm/async_pf.c 2017-04-18 07:15:37.000000000 +0200
  29100. +++ linux-4.4.62/virt/kvm/async_pf.c 2017-04-18 17:38:08.242651106 +0200
  29101. @@ -98,8 +98,8 @@
  29102. * This memory barrier pairs with prepare_to_wait's set_current_state()
  29103. */
  29104. smp_mb();
  29105. - if (waitqueue_active(&vcpu->wq))
  29106. - wake_up_interruptible(&vcpu->wq);
  29107. + if (swait_active(&vcpu->wq))
  29108. + swake_up(&vcpu->wq);
  29109. mmput(mm);
  29110. kvm_put_kvm(vcpu->kvm);
  29111. diff -Nur linux-4.4.62.orig/virt/kvm/kvm_main.c linux-4.4.62/virt/kvm/kvm_main.c
  29112. --- linux-4.4.62.orig/virt/kvm/kvm_main.c 2017-04-18 07:15:37.000000000 +0200
  29113. +++ linux-4.4.62/virt/kvm/kvm_main.c 2017-04-18 17:38:08.242651106 +0200
  29114. @@ -228,8 +228,7 @@
  29115. vcpu->kvm = kvm;
  29116. vcpu->vcpu_id = id;
  29117. vcpu->pid = NULL;
  29118. - vcpu->halt_poll_ns = 0;
  29119. - init_waitqueue_head(&vcpu->wq);
  29120. + init_swait_queue_head(&vcpu->wq);
  29121. kvm_async_pf_vcpu_init(vcpu);
  29122. vcpu->pre_pcpu = -1;
  29123. @@ -2008,7 +2007,7 @@
  29124. void kvm_vcpu_block(struct kvm_vcpu *vcpu)
  29125. {
  29126. ktime_t start, cur;
  29127. - DEFINE_WAIT(wait);
  29128. + DECLARE_SWAITQUEUE(wait);
  29129. bool waited = false;
  29130. u64 block_ns;
  29131. @@ -2033,7 +2032,7 @@
  29132. kvm_arch_vcpu_blocking(vcpu);
  29133. for (;;) {
  29134. - prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
  29135. + prepare_to_swait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
  29136. if (kvm_vcpu_check_block(vcpu) < 0)
  29137. break;
  29138. @@ -2042,7 +2041,7 @@
  29139. schedule();
  29140. }
  29141. - finish_wait(&vcpu->wq, &wait);
  29142. + finish_swait(&vcpu->wq, &wait);
  29143. cur = ktime_get();
  29144. kvm_arch_vcpu_unblocking(vcpu);
  29145. @@ -2074,11 +2073,11 @@
  29146. {
  29147. int me;
  29148. int cpu = vcpu->cpu;
  29149. - wait_queue_head_t *wqp;
  29150. + struct swait_queue_head *wqp;
  29151. wqp = kvm_arch_vcpu_wq(vcpu);
  29152. - if (waitqueue_active(wqp)) {
  29153. - wake_up_interruptible(wqp);
  29154. + if (swait_active(wqp)) {
  29155. + swake_up(wqp);
  29156. ++vcpu->stat.halt_wakeup;
  29157. }
  29158. @@ -2179,7 +2178,7 @@
  29159. continue;
  29160. if (vcpu == me)
  29161. continue;
  29162. - if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
  29163. + if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
  29164. continue;
  29165. if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
  29166. continue;