mptcp.patch 487 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839108401084110842108431084410845108461084710848108491085010851108521085310854108551085610857108581085910860108611086210863108641086510866108671086810869108701087110872108731087410875108761087710878108791088010881108821088310884108851088610887108881088910890108911089210893108941089510896108971089810899109001090110902109031090410905109061090710908109091091010911109121091310914109151091610917109181091910920109211092210923109241092510926109271092810929109301093110932109331093410935109361093710938109391094010941109421094310944109451094610947109481094910950109511095210953109541095510956109571095810959109601096110962109631096410965109661096710968109691097010971109721097310974109751097610977109781097910980109811098210983109841098510986109871098810989109901099110992109931099410995109961099710998109991100011001110021100311004110051100611007110081100911010110111101211013110141101511016110171101811019110201102111022110231102411025110261102711028110291103011031110321103311034110351103611037110381103911040110411104211043110441104511046110471104811049110501105111052110531105411055110561105711058110591106011061110621106311064110651106611067110681106911070110711107211073110741107511076110771107811079110801108111082110831108411085110861108711088110891109011091110921109311094110951109611097110981109911100111011110211103111041110511106111071110811109111101111111112111131111411115111161111711118111191112011121111221112311124111251112611127111281112911130111311113211133111341113511136111371113811139111401114111142111431114411145111461114711148111491115011151111521115311154111551115611157111581115911160111611116211163111641116511166111671116811169111701117111172111731117411175111761117711178111791118011181111821118311184111851118611187111881118911190111911119211193111941119511196111971119811199112001120111202112031120411205112061120711208112091121011211112121121311214112151121611217112181121911220112211122211223112241122511226112271122811229112301123111232112331123411235112361123711238112391124011241112421124311244112451124611247112481124911250112511125211253112541125511256112571125811259112601126111262112631126411265112661126711268112691127011271112721127311274112751127611277112781127911280112811128211283112841128511286112871128811289112901129111292112931129411295112961129711298112991130011301113021130311304113051130611307113081130911310113111131211313113141131511316113171131811319113201132111322113231132411325113261132711328113291133011331113321133311334113351133611337113381133911340113411134211343113441134511346113471134811349113501135111352113531135411355113561135711358113591136011361113621136311364113651136611367113681136911370113711137211373113741137511376113771137811379113801138111382113831138411385113861138711388113891139011391113921139311394113951139611397113981139911400114011140211403114041140511406114071140811409114101141111412114131141411415114161141711418114191142011421114221142311424114251142611427114281142911430114311143211433114341143511436114371143811439114401144111442114431144411445114461144711448114491145011451114521145311454114551145611457114581145911460114611146211463114641146511466114671146811469114701147111472114731147411475114761147711478114791148011481114821148311484114851148611487114881148911490114911149211493114941149511496114971149811499115001150111502115031150411505115061150711508115091151011511115121151311514115151151611517115181151911520115211152211523115241152511526115271152811529115301153111532115331153411535115361153711538115391154011541115421154311544115451154611547115481154911550115511155211553115541155511556115571155811559115601156111562115631156411565115661156711568115691157011571115721157311574115751157611577115781157911580115811158211583115841158511586115871158811589115901159111592115931159411595115961159711598115991160011601116021160311604116051160611607116081160911610116111161211613116141161511616116171161811619116201162111622116231162411625116261162711628116291163011631116321163311634116351163611637116381163911640116411164211643116441164511646116471164811649116501165111652116531165411655116561165711658116591166011661116621166311664116651166611667116681166911670116711167211673116741167511676116771167811679116801168111682116831168411685116861168711688116891169011691116921169311694116951169611697116981169911700117011170211703117041170511706117071170811709117101171111712117131171411715117161171711718117191172011721117221172311724117251172611727117281172911730117311173211733117341173511736117371173811739117401174111742117431174411745117461174711748117491175011751117521175311754117551175611757117581175911760117611176211763117641176511766117671176811769117701177111772117731177411775117761177711778117791178011781117821178311784117851178611787117881178911790117911179211793117941179511796117971179811799118001180111802118031180411805118061180711808118091181011811118121181311814118151181611817118181181911820118211182211823118241182511826118271182811829118301183111832118331183411835118361183711838118391184011841118421184311844118451184611847118481184911850118511185211853118541185511856118571185811859118601186111862118631186411865118661186711868118691187011871118721187311874118751187611877118781187911880118811188211883118841188511886118871188811889118901189111892118931189411895118961189711898118991190011901119021190311904119051190611907119081190911910119111191211913119141191511916119171191811919119201192111922119231192411925119261192711928119291193011931119321193311934119351193611937119381193911940119411194211943119441194511946119471194811949119501195111952119531195411955119561195711958119591196011961119621196311964119651196611967119681196911970119711197211973119741197511976119771197811979119801198111982119831198411985119861198711988119891199011991119921199311994119951199611997119981199912000120011200212003120041200512006120071200812009120101201112012120131201412015120161201712018120191202012021120221202312024120251202612027120281202912030120311203212033120341203512036120371203812039120401204112042120431204412045120461204712048120491205012051120521205312054120551205612057120581205912060120611206212063120641206512066120671206812069120701207112072120731207412075120761207712078120791208012081120821208312084120851208612087120881208912090120911209212093120941209512096120971209812099121001210112102121031210412105121061210712108121091211012111121121211312114121151211612117121181211912120121211212212123121241212512126121271212812129121301213112132121331213412135121361213712138121391214012141121421214312144121451214612147121481214912150121511215212153121541215512156121571215812159121601216112162121631216412165121661216712168121691217012171121721217312174121751217612177121781217912180121811218212183121841218512186121871218812189121901219112192121931219412195121961219712198121991220012201122021220312204122051220612207122081220912210122111221212213122141221512216122171221812219122201222112222122231222412225122261222712228122291223012231122321223312234122351223612237122381223912240122411224212243122441224512246122471224812249122501225112252122531225412255122561225712258122591226012261122621226312264122651226612267122681226912270122711227212273122741227512276122771227812279122801228112282122831228412285122861228712288122891229012291122921229312294122951229612297122981229912300123011230212303123041230512306123071230812309123101231112312123131231412315123161231712318123191232012321123221232312324123251232612327123281232912330123311233212333123341233512336123371233812339123401234112342123431234412345123461234712348123491235012351123521235312354123551235612357123581235912360123611236212363123641236512366123671236812369123701237112372123731237412375123761237712378123791238012381123821238312384123851238612387123881238912390123911239212393123941239512396123971239812399124001240112402124031240412405124061240712408124091241012411124121241312414124151241612417124181241912420124211242212423124241242512426124271242812429124301243112432124331243412435124361243712438124391244012441124421244312444124451244612447124481244912450124511245212453124541245512456124571245812459124601246112462124631246412465124661246712468124691247012471124721247312474124751247612477124781247912480124811248212483124841248512486124871248812489124901249112492124931249412495124961249712498124991250012501125021250312504125051250612507125081250912510125111251212513125141251512516125171251812519125201252112522125231252412525125261252712528125291253012531125321253312534125351253612537125381253912540125411254212543125441254512546125471254812549125501255112552125531255412555125561255712558125591256012561125621256312564125651256612567125681256912570125711257212573125741257512576125771257812579125801258112582125831258412585125861258712588125891259012591125921259312594125951259612597125981259912600126011260212603126041260512606126071260812609126101261112612126131261412615126161261712618126191262012621126221262312624126251262612627126281262912630126311263212633126341263512636126371263812639126401264112642126431264412645126461264712648126491265012651126521265312654126551265612657126581265912660126611266212663126641266512666126671266812669126701267112672126731267412675126761267712678126791268012681126821268312684126851268612687126881268912690126911269212693126941269512696126971269812699127001270112702127031270412705127061270712708127091271012711127121271312714127151271612717127181271912720127211272212723127241272512726127271272812729127301273112732127331273412735127361273712738127391274012741127421274312744127451274612747127481274912750127511275212753127541275512756127571275812759127601276112762127631276412765127661276712768127691277012771127721277312774127751277612777127781277912780127811278212783127841278512786127871278812789127901279112792127931279412795127961279712798127991280012801128021280312804128051280612807128081280912810128111281212813128141281512816128171281812819128201282112822128231282412825128261282712828128291283012831128321283312834128351283612837128381283912840128411284212843128441284512846128471284812849128501285112852128531285412855128561285712858128591286012861128621286312864128651286612867128681286912870128711287212873128741287512876128771287812879128801288112882128831288412885128861288712888128891289012891128921289312894128951289612897128981289912900129011290212903129041290512906129071290812909129101291112912129131291412915129161291712918129191292012921129221292312924129251292612927129281292912930129311293212933129341293512936129371293812939129401294112942129431294412945129461294712948129491295012951129521295312954129551295612957129581295912960129611296212963129641296512966129671296812969129701297112972129731297412975129761297712978129791298012981129821298312984129851298612987129881298912990129911299212993129941299512996129971299812999130001300113002130031300413005130061300713008130091301013011130121301313014130151301613017130181301913020130211302213023130241302513026130271302813029130301303113032130331303413035130361303713038130391304013041130421304313044130451304613047130481304913050130511305213053130541305513056130571305813059130601306113062130631306413065130661306713068130691307013071130721307313074130751307613077130781307913080130811308213083130841308513086130871308813089130901309113092130931309413095130961309713098130991310013101131021310313104131051310613107131081310913110131111311213113131141311513116131171311813119131201312113122131231312413125131261312713128131291313013131131321313313134131351313613137131381313913140131411314213143131441314513146131471314813149131501315113152131531315413155131561315713158131591316013161131621316313164131651316613167131681316913170131711317213173131741317513176131771317813179131801318113182131831318413185131861318713188131891319013191131921319313194131951319613197131981319913200132011320213203132041320513206132071320813209132101321113212132131321413215132161321713218132191322013221132221322313224132251322613227132281322913230132311323213233132341323513236132371323813239132401324113242132431324413245132461324713248132491325013251132521325313254132551325613257132581325913260132611326213263132641326513266132671326813269132701327113272132731327413275132761327713278132791328013281132821328313284132851328613287132881328913290132911329213293132941329513296132971329813299133001330113302133031330413305133061330713308133091331013311133121331313314133151331613317133181331913320133211332213323133241332513326133271332813329133301333113332133331333413335133361333713338133391334013341133421334313344133451334613347133481334913350133511335213353133541335513356133571335813359133601336113362133631336413365133661336713368133691337013371133721337313374133751337613377133781337913380133811338213383133841338513386133871338813389133901339113392133931339413395133961339713398133991340013401134021340313404134051340613407134081340913410134111341213413134141341513416134171341813419134201342113422134231342413425134261342713428134291343013431134321343313434134351343613437134381343913440134411344213443134441344513446134471344813449134501345113452134531345413455134561345713458134591346013461134621346313464134651346613467134681346913470134711347213473134741347513476134771347813479134801348113482134831348413485134861348713488134891349013491134921349313494134951349613497134981349913500135011350213503135041350513506135071350813509135101351113512135131351413515135161351713518135191352013521135221352313524135251352613527135281352913530135311353213533135341353513536135371353813539135401354113542135431354413545135461354713548135491355013551135521355313554135551355613557135581355913560135611356213563135641356513566135671356813569135701357113572135731357413575135761357713578135791358013581135821358313584135851358613587135881358913590135911359213593135941359513596135971359813599136001360113602136031360413605136061360713608136091361013611136121361313614136151361613617136181361913620136211362213623136241362513626136271362813629136301363113632136331363413635136361363713638136391364013641136421364313644136451364613647136481364913650136511365213653136541365513656136571365813659136601366113662136631366413665136661366713668136691367013671136721367313674136751367613677136781367913680136811368213683136841368513686136871368813689136901369113692136931369413695136961369713698136991370013701137021370313704137051370613707137081370913710137111371213713137141371513716137171371813719137201372113722137231372413725137261372713728137291373013731137321373313734137351373613737137381373913740137411374213743137441374513746137471374813749137501375113752137531375413755137561375713758137591376013761137621376313764137651376613767137681376913770137711377213773137741377513776137771377813779137801378113782137831378413785137861378713788137891379013791137921379313794137951379613797137981379913800138011380213803138041380513806138071380813809138101381113812138131381413815138161381713818138191382013821138221382313824138251382613827138281382913830138311383213833138341383513836138371383813839138401384113842138431384413845138461384713848138491385013851138521385313854138551385613857138581385913860138611386213863138641386513866138671386813869138701387113872138731387413875138761387713878138791388013881138821388313884138851388613887138881388913890138911389213893138941389513896138971389813899139001390113902139031390413905139061390713908139091391013911139121391313914139151391613917139181391913920139211392213923139241392513926139271392813929139301393113932139331393413935139361393713938139391394013941139421394313944139451394613947139481394913950139511395213953139541395513956139571395813959139601396113962139631396413965139661396713968139691397013971139721397313974139751397613977139781397913980139811398213983139841398513986139871398813989139901399113992139931399413995139961399713998139991400014001140021400314004140051400614007140081400914010140111401214013140141401514016140171401814019140201402114022140231402414025140261402714028140291403014031140321403314034140351403614037140381403914040140411404214043140441404514046140471404814049140501405114052140531405414055140561405714058140591406014061140621406314064140651406614067140681406914070140711407214073140741407514076140771407814079140801408114082140831408414085140861408714088140891409014091140921409314094140951409614097140981409914100141011410214103141041410514106141071410814109141101411114112141131411414115141161411714118141191412014121141221412314124141251412614127141281412914130141311413214133141341413514136141371413814139141401414114142141431414414145141461414714148141491415014151141521415314154141551415614157141581415914160141611416214163141641416514166141671416814169141701417114172141731417414175141761417714178141791418014181141821418314184141851418614187141881418914190141911419214193141941419514196141971419814199142001420114202142031420414205142061420714208142091421014211142121421314214142151421614217142181421914220142211422214223142241422514226142271422814229142301423114232142331423414235142361423714238142391424014241142421424314244142451424614247142481424914250142511425214253142541425514256142571425814259142601426114262142631426414265142661426714268142691427014271142721427314274142751427614277142781427914280142811428214283142841428514286142871428814289142901429114292142931429414295142961429714298142991430014301143021430314304143051430614307143081430914310143111431214313143141431514316143171431814319143201432114322143231432414325143261432714328143291433014331143321433314334143351433614337143381433914340143411434214343143441434514346143471434814349143501435114352143531435414355143561435714358143591436014361143621436314364143651436614367143681436914370143711437214373143741437514376143771437814379143801438114382143831438414385143861438714388143891439014391143921439314394143951439614397143981439914400144011440214403144041440514406144071440814409144101441114412144131441414415144161441714418144191442014421144221442314424144251442614427144281442914430144311443214433144341443514436144371443814439144401444114442144431444414445144461444714448144491445014451144521445314454144551445614457144581445914460144611446214463144641446514466144671446814469144701447114472144731447414475144761447714478144791448014481144821448314484144851448614487144881448914490144911449214493144941449514496144971449814499145001450114502145031450414505145061450714508145091451014511145121451314514145151451614517145181451914520145211452214523145241452514526145271452814529145301453114532145331453414535145361453714538145391454014541145421454314544145451454614547145481454914550145511455214553145541455514556145571455814559145601456114562145631456414565145661456714568145691457014571145721457314574145751457614577145781457914580145811458214583145841458514586145871458814589145901459114592145931459414595145961459714598145991460014601146021460314604146051460614607146081460914610146111461214613146141461514616146171461814619146201462114622146231462414625146261462714628146291463014631146321463314634146351463614637146381463914640146411464214643146441464514646146471464814649146501465114652146531465414655146561465714658146591466014661146621466314664146651466614667146681466914670146711467214673146741467514676146771467814679146801468114682146831468414685146861468714688146891469014691146921469314694146951469614697146981469914700147011470214703147041470514706147071470814709147101471114712147131471414715147161471714718147191472014721147221472314724147251472614727147281472914730147311473214733147341473514736147371473814739147401474114742147431474414745147461474714748147491475014751147521475314754147551475614757147581475914760147611476214763147641476514766147671476814769147701477114772147731477414775147761477714778147791478014781147821478314784147851478614787147881478914790147911479214793147941479514796147971479814799148001480114802148031480414805148061480714808148091481014811148121481314814148151481614817148181481914820148211482214823148241482514826148271482814829148301483114832148331483414835148361483714838148391484014841148421484314844148451484614847148481484914850148511485214853148541485514856148571485814859148601486114862148631486414865148661486714868148691487014871148721487314874148751487614877148781487914880148811488214883148841488514886148871488814889148901489114892148931489414895148961489714898148991490014901149021490314904149051490614907149081490914910149111491214913149141491514916149171491814919149201492114922149231492414925149261492714928149291493014931149321493314934149351493614937149381493914940149411494214943149441494514946149471494814949149501495114952149531495414955149561495714958149591496014961149621496314964149651496614967149681496914970149711497214973149741497514976149771497814979149801498114982149831498414985149861498714988149891499014991149921499314994149951499614997149981499915000150011500215003150041500515006150071500815009150101501115012150131501415015150161501715018150191502015021150221502315024150251502615027150281502915030150311503215033150341503515036150371503815039150401504115042150431504415045150461504715048150491505015051150521505315054150551505615057150581505915060150611506215063150641506515066150671506815069150701507115072150731507415075150761507715078150791508015081150821508315084150851508615087150881508915090150911509215093150941509515096150971509815099151001510115102151031510415105151061510715108151091511015111151121511315114151151511615117151181511915120151211512215123151241512515126151271512815129151301513115132151331513415135151361513715138151391514015141151421514315144151451514615147151481514915150151511515215153151541515515156151571515815159151601516115162151631516415165151661516715168151691517015171151721517315174151751517615177151781517915180151811518215183151841518515186151871518815189151901519115192151931519415195151961519715198151991520015201152021520315204152051520615207152081520915210152111521215213152141521515216152171521815219152201522115222152231522415225152261522715228152291523015231152321523315234152351523615237152381523915240152411524215243152441524515246152471524815249152501525115252152531525415255152561525715258152591526015261152621526315264152651526615267152681526915270152711527215273152741527515276152771527815279152801528115282152831528415285152861528715288152891529015291152921529315294152951529615297152981529915300153011530215303153041530515306153071530815309153101531115312153131531415315153161531715318153191532015321153221532315324153251532615327153281532915330153311533215333153341533515336153371533815339153401534115342153431534415345153461534715348153491535015351153521535315354153551535615357153581535915360153611536215363153641536515366153671536815369153701537115372153731537415375153761537715378153791538015381153821538315384153851538615387153881538915390153911539215393153941539515396153971539815399154001540115402154031540415405154061540715408154091541015411154121541315414154151541615417154181541915420154211542215423154241542515426154271542815429154301543115432154331543415435154361543715438154391544015441154421544315444154451544615447154481544915450154511545215453154541545515456154571545815459154601546115462154631546415465154661546715468154691547015471154721547315474154751547615477154781547915480154811548215483154841548515486154871548815489154901549115492154931549415495154961549715498154991550015501155021550315504155051550615507155081550915510155111551215513155141551515516155171551815519155201552115522155231552415525155261552715528155291553015531155321553315534155351553615537155381553915540155411554215543155441554515546155471554815549155501555115552155531555415555155561555715558155591556015561155621556315564155651556615567155681556915570155711557215573155741557515576155771557815579155801558115582155831558415585155861558715588155891559015591155921559315594155951559615597155981559915600156011560215603156041560515606156071560815609156101561115612156131561415615156161561715618156191562015621156221562315624156251562615627156281562915630156311563215633156341563515636156371563815639156401564115642156431564415645156461564715648156491565015651156521565315654156551565615657156581565915660156611566215663156641566515666156671566815669156701567115672156731567415675156761567715678156791568015681156821568315684156851568615687156881568915690156911569215693156941569515696156971569815699157001570115702157031570415705157061570715708157091571015711157121571315714157151571615717157181571915720157211572215723157241572515726157271572815729157301573115732157331573415735157361573715738157391574015741157421574315744157451574615747157481574915750157511575215753157541575515756157571575815759157601576115762157631576415765157661576715768157691577015771157721577315774157751577615777157781577915780157811578215783157841578515786157871578815789157901579115792157931579415795157961579715798157991580015801158021580315804158051580615807158081580915810158111581215813158141581515816158171581815819158201582115822158231582415825158261582715828158291583015831158321583315834158351583615837158381583915840158411584215843158441584515846158471584815849158501585115852158531585415855158561585715858158591586015861158621586315864158651586615867158681586915870158711587215873158741587515876158771587815879158801588115882158831588415885158861588715888158891589015891158921589315894158951589615897158981589915900159011590215903159041590515906159071590815909159101591115912159131591415915159161591715918159191592015921159221592315924159251592615927159281592915930159311593215933159341593515936159371593815939159401594115942159431594415945159461594715948159491595015951159521595315954159551595615957159581595915960159611596215963159641596515966159671596815969159701597115972159731597415975159761597715978159791598015981159821598315984159851598615987159881598915990159911599215993159941599515996159971599815999160001600116002160031600416005160061600716008160091601016011160121601316014160151601616017160181601916020160211602216023160241602516026160271602816029160301603116032160331603416035160361603716038160391604016041160421604316044160451604616047160481604916050160511605216053160541605516056160571605816059160601606116062160631606416065160661606716068160691607016071160721607316074160751607616077160781607916080160811608216083160841608516086160871608816089160901609116092160931609416095160961609716098160991610016101161021610316104161051610616107161081610916110161111611216113161141611516116161171611816119161201612116122161231612416125161261612716128161291613016131161321613316134161351613616137161381613916140161411614216143161441614516146161471614816149161501615116152161531615416155161561615716158161591616016161161621616316164161651616616167161681616916170161711617216173161741617516176161771617816179161801618116182161831618416185161861618716188161891619016191161921619316194161951619616197161981619916200162011620216203162041620516206162071620816209162101621116212162131621416215162161621716218162191622016221162221622316224162251622616227162281622916230162311623216233162341623516236162371623816239162401624116242162431624416245162461624716248162491625016251162521625316254162551625616257162581625916260162611626216263162641626516266162671626816269162701627116272162731627416275162761627716278162791628016281162821628316284162851628616287162881628916290162911629216293162941629516296162971629816299163001630116302163031630416305163061630716308163091631016311163121631316314163151631616317163181631916320163211632216323163241632516326163271632816329163301633116332163331633416335163361633716338163391634016341163421634316344163451634616347163481634916350163511635216353163541635516356163571635816359163601636116362163631636416365163661636716368163691637016371163721637316374163751637616377163781637916380163811638216383163841638516386163871638816389163901639116392163931639416395163961639716398163991640016401164021640316404164051640616407164081640916410164111641216413164141641516416164171641816419164201642116422164231642416425164261642716428164291643016431164321643316434164351643616437164381643916440164411644216443164441644516446164471644816449164501645116452164531645416455164561645716458164591646016461164621646316464164651646616467164681646916470164711647216473164741647516476164771647816479164801648116482164831648416485164861648716488164891649016491164921649316494164951649616497164981649916500165011650216503165041650516506165071650816509165101651116512165131651416515165161651716518165191652016521165221652316524165251652616527165281652916530165311653216533165341653516536165371653816539165401654116542165431654416545165461654716548165491655016551165521655316554165551655616557165581655916560165611656216563165641656516566165671656816569165701657116572165731657416575165761657716578165791658016581165821658316584165851658616587165881658916590165911659216593165941659516596165971659816599166001660116602166031660416605166061660716608166091661016611166121661316614166151661616617166181661916620166211662216623166241662516626166271662816629166301663116632166331663416635166361663716638166391664016641166421664316644166451664616647166481664916650166511665216653166541665516656166571665816659166601666116662166631666416665166661666716668166691667016671166721667316674166751667616677166781667916680166811668216683166841668516686166871668816689166901669116692166931669416695166961669716698166991670016701167021670316704167051670616707167081670916710167111671216713167141671516716167171671816719167201672116722167231672416725167261672716728167291673016731167321673316734167351673616737167381673916740167411674216743167441674516746167471674816749167501675116752167531675416755167561675716758167591676016761167621676316764167651676616767167681676916770167711677216773167741677516776167771677816779167801678116782167831678416785167861678716788167891679016791167921679316794167951679616797167981679916800168011680216803168041680516806168071680816809168101681116812168131681416815168161681716818168191682016821168221682316824168251682616827168281682916830168311683216833168341683516836168371683816839168401684116842168431684416845168461684716848168491685016851168521685316854168551685616857168581685916860168611686216863168641686516866168671686816869168701687116872168731687416875168761687716878168791688016881168821688316884168851688616887168881688916890168911689216893168941689516896168971689816899169001690116902169031690416905169061690716908169091691016911169121691316914169151691616917169181691916920169211692216923169241692516926169271692816929169301693116932169331693416935169361693716938169391694016941169421694316944169451694616947169481694916950169511695216953169541695516956169571695816959169601696116962169631696416965169661696716968169691697016971169721697316974169751697616977169781697916980169811698216983169841698516986169871698816989169901699116992169931699416995169961699716998169991700017001170021700317004170051700617007170081700917010170111701217013170141701517016170171701817019170201702117022170231702417025170261702717028170291703017031170321703317034170351703617037170381703917040170411704217043170441704517046170471704817049170501705117052170531705417055170561705717058170591706017061170621706317064170651706617067170681706917070170711707217073170741707517076170771707817079170801708117082170831708417085170861708717088170891709017091170921709317094170951709617097170981709917100171011710217103171041710517106171071710817109171101711117112171131711417115171161711717118171191712017121171221712317124171251712617127171281712917130171311713217133
  1. diff -Nur linux-3.14.45.orig/drivers/infiniband/hw/cxgb4/cm.c linux-3.14.45/drivers/infiniband/hw/cxgb4/cm.c
  2. --- linux-3.14.45.orig/drivers/infiniband/hw/cxgb4/cm.c 2015-06-23 02:01:36.000000000 +0200
  3. +++ linux-3.14.45/drivers/infiniband/hw/cxgb4/cm.c 2015-06-24 14:15:48.871862463 +0200
  4. @@ -3162,7 +3162,7 @@
  5. */
  6. memset(&tmp_opt, 0, sizeof(tmp_opt));
  7. tcp_clear_options(&tmp_opt);
  8. - tcp_parse_options(skb, &tmp_opt, 0, NULL);
  9. + tcp_parse_options(skb, &tmp_opt, NULL, 0, NULL);
  10. req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req));
  11. memset(req, 0, sizeof(*req));
  12. diff -Nur linux-3.14.45.orig/include/linux/ipv6.h linux-3.14.45/include/linux/ipv6.h
  13. --- linux-3.14.45.orig/include/linux/ipv6.h 2015-06-23 02:01:36.000000000 +0200
  14. +++ linux-3.14.45/include/linux/ipv6.h 2015-06-24 14:15:48.871862463 +0200
  15. @@ -309,12 +309,6 @@
  16. return NULL;
  17. }
  18. -static inline struct inet6_request_sock *
  19. - inet6_rsk(const struct request_sock *rsk)
  20. -{
  21. - return NULL;
  22. -}
  23. -
  24. static inline struct raw6_sock *raw6_sk(const struct sock *sk)
  25. {
  26. return NULL;
  27. diff -Nur linux-3.14.45.orig/include/linux/tcp.h linux-3.14.45/include/linux/tcp.h
  28. --- linux-3.14.45.orig/include/linux/tcp.h 2015-06-23 02:01:36.000000000 +0200
  29. +++ linux-3.14.45/include/linux/tcp.h 2015-06-24 14:15:48.871862463 +0200
  30. @@ -72,6 +72,53 @@
  31. u32 end_seq;
  32. };
  33. +struct tcp_out_options {
  34. + u16 options; /* bit field of OPTION_* */
  35. + u8 ws; /* window scale, 0 to disable */
  36. + u8 num_sack_blocks;/* number of SACK blocks to include */
  37. + u8 hash_size; /* bytes in hash_location */
  38. + u16 mss; /* 0 to disable */
  39. + __u8 *hash_location; /* temporary pointer, overloaded */
  40. + __u32 tsval, tsecr; /* need to include OPTION_TS */
  41. + struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
  42. +#ifdef CONFIG_MPTCP
  43. + u16 mptcp_options; /* bit field of MPTCP related OPTION_* */
  44. + u8 dss_csum:1,
  45. + add_addr_v4:1,
  46. + add_addr_v6:1; /* dss-checksum required? */
  47. +
  48. + __u32 data_seq; /* data sequence number, for MPTCP */
  49. + __u32 data_ack; /* data ack, for MPTCP */
  50. +
  51. + union {
  52. + struct {
  53. + __u64 sender_key; /* sender's key for mptcp */
  54. + __u64 receiver_key; /* receiver's key for mptcp */
  55. + } mp_capable;
  56. +
  57. + struct {
  58. + __u64 sender_truncated_mac;
  59. + __u32 sender_nonce;
  60. + /* random number of the sender */
  61. + __u32 token; /* token for mptcp */
  62. + } mp_join_syns;
  63. + };
  64. +
  65. + struct {
  66. + struct in_addr addr;
  67. + u8 addr_id;
  68. + } add_addr4;
  69. +
  70. + struct {
  71. + struct in6_addr addr;
  72. + u8 addr_id;
  73. + } add_addr6;
  74. +
  75. + u16 remove_addrs; /* list of address id */
  76. + u8 addr_id; /* address id (mp_join or add_address) */
  77. +#endif /* CONFIG_MPTCP */
  78. +};
  79. +
  80. /*These are used to set the sack_ok field in struct tcp_options_received */
  81. #define TCP_SACK_SEEN (1 << 0) /*1 = peer is SACK capable, */
  82. #define TCP_FACK_ENABLED (1 << 1) /*1 = FACK is enabled locally*/
  83. @@ -95,6 +142,9 @@
  84. u16 mss_clamp; /* Maximal mss, negotiated at connection setup */
  85. };
  86. +struct mptcp_cb;
  87. +struct mptcp_tcp_sock;
  88. +
  89. static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
  90. {
  91. rx_opt->tstamp_ok = rx_opt->sack_ok = 0;
  92. @@ -123,6 +173,7 @@
  93. * FastOpen it's the seq#
  94. * after data-in-SYN.
  95. */
  96. + u8 saw_mpc:1;
  97. };
  98. static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
  99. @@ -130,6 +181,8 @@
  100. return (struct tcp_request_sock *)req;
  101. }
  102. +struct tcp_md5sig_key;
  103. +
  104. struct tcp_sock {
  105. /* inet_connection_sock has to be the first member of tcp_sock */
  106. struct inet_connection_sock inet_conn;
  107. @@ -323,6 +376,45 @@
  108. * socket. Used to retransmit SYNACKs etc.
  109. */
  110. struct request_sock *fastopen_rsk;
  111. +
  112. +
  113. + struct mptcp_cb *mpcb;
  114. + struct sock *meta_sk;
  115. + /* We keep these flags even if CONFIG_MPTCP is not checked, because
  116. + * it allows checking MPTCP capability just by checking the mpc flag,
  117. + * rather than adding ifdefs everywhere.
  118. + */
  119. + u16 mpc:1, /* Other end is multipath capable */
  120. + inside_tk_table:1, /* Is the tcp_sock inside the token-table? */
  121. + send_mp_fclose:1,
  122. + request_mptcp:1, /* Did we send out an MP_CAPABLE?
  123. + * (this speeds up mptcp_doit() in tcp_recvmsg)
  124. + */
  125. + mptcp_enabled:1, /* Is MPTCP enabled from the application ? */
  126. + pf:1, /* Potentially Failed state: when this flag is set, we
  127. + * stop using the subflow
  128. + */
  129. + mp_killed:1, /* Killed with a tcp_done in mptcp? */
  130. + was_meta_sk:1, /* This was a meta sk (in case of reuse) */
  131. + close_it:1, /* Must close socket in mptcp_data_ready? */
  132. + closing:1;
  133. + struct mptcp_tcp_sock *mptcp;
  134. +#ifdef CONFIG_MPTCP
  135. + struct hlist_nulls_node tk_table;
  136. + u32 mptcp_loc_token;
  137. + u64 mptcp_loc_key;
  138. +#endif /* CONFIG_MPTCP */
  139. +
  140. + /* Functions that depend on the value of the mpc flag */
  141. + u32 (*__select_window)(struct sock *sk);
  142. + u16 (*select_window)(struct sock *sk);
  143. + void (*select_initial_window)(int __space, __u32 mss, __u32 *rcv_wnd,
  144. + __u32 *window_clamp, int wscale_ok,
  145. + __u8 *rcv_wscale, __u32 init_rcv_wnd,
  146. + const struct sock *sk);
  147. + void (*init_buffer_space)(struct sock *sk);
  148. + void (*set_rto)(struct sock *sk);
  149. + bool (*should_expand_sndbuf)(const struct sock *sk);
  150. };
  151. enum tsq_flags {
  152. @@ -334,6 +426,8 @@
  153. TCP_MTU_REDUCED_DEFERRED, /* tcp_v{4|6}_err() could not call
  154. * tcp_v{4|6}_mtu_reduced()
  155. */
  156. + MPTCP_PATH_MANAGER, /* MPTCP deferred creation of new subflows */
  157. + MPTCP_SUB_DEFERRED, /* A subflow got deferred - process them */
  158. };
  159. static inline struct tcp_sock *tcp_sk(const struct sock *sk)
  160. @@ -352,6 +446,7 @@
  161. #ifdef CONFIG_TCP_MD5SIG
  162. struct tcp_md5sig_key *tw_md5_key;
  163. #endif
  164. + struct mptcp_tw *mptcp_tw;
  165. };
  166. static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
  167. diff -Nur linux-3.14.45.orig/include/net/inet6_connection_sock.h linux-3.14.45/include/net/inet6_connection_sock.h
  168. --- linux-3.14.45.orig/include/net/inet6_connection_sock.h 2015-06-23 02:01:36.000000000 +0200
  169. +++ linux-3.14.45/include/net/inet6_connection_sock.h 2015-06-24 14:15:48.871862463 +0200
  170. @@ -27,6 +27,8 @@
  171. struct dst_entry *inet6_csk_route_req(struct sock *sk, struct flowi6 *fl6,
  172. const struct request_sock *req);
  173. +u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
  174. + const u32 rnd, const u32 synq_hsize);
  175. struct request_sock *inet6_csk_search_req(const struct sock *sk,
  176. struct request_sock ***prevp,
  177. diff -Nur linux-3.14.45.orig/include/net/inet_common.h linux-3.14.45/include/net/inet_common.h
  178. --- linux-3.14.45.orig/include/net/inet_common.h 2015-06-23 02:01:36.000000000 +0200
  179. +++ linux-3.14.45/include/net/inet_common.h 2015-06-24 14:15:48.871862463 +0200
  180. @@ -1,6 +1,8 @@
  181. #ifndef _INET_COMMON_H
  182. #define _INET_COMMON_H
  183. +#include <net/sock.h>
  184. +
  185. extern const struct proto_ops inet_stream_ops;
  186. extern const struct proto_ops inet_dgram_ops;
  187. @@ -13,6 +15,8 @@
  188. struct sockaddr;
  189. struct socket;
  190. +int inet_create(struct net *net, struct socket *sock, int protocol, int kern);
  191. +int inet6_create(struct net *net, struct socket *sock, int protocol, int kern);
  192. int inet_release(struct socket *sock);
  193. int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
  194. int addr_len, int flags);
  195. diff -Nur linux-3.14.45.orig/include/net/inet_connection_sock.h linux-3.14.45/include/net/inet_connection_sock.h
  196. --- linux-3.14.45.orig/include/net/inet_connection_sock.h 2015-06-23 02:01:36.000000000 +0200
  197. +++ linux-3.14.45/include/net/inet_connection_sock.h 2015-06-24 14:15:48.871862463 +0200
  198. @@ -244,6 +244,9 @@
  199. struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);
  200. +u32 inet_synq_hash(const __be32 raddr, const __be16 rport, const u32 rnd,
  201. + const u32 synq_hsize);
  202. +
  203. struct request_sock *inet_csk_search_req(const struct sock *sk,
  204. struct request_sock ***prevp,
  205. const __be16 rport,
  206. diff -Nur linux-3.14.45.orig/include/net/mptcp.h linux-3.14.45/include/net/mptcp.h
  207. --- linux-3.14.45.orig/include/net/mptcp.h 1970-01-01 01:00:00.000000000 +0100
  208. +++ linux-3.14.45/include/net/mptcp.h 2015-06-24 14:15:48.871862463 +0200
  209. @@ -0,0 +1,1471 @@
  210. +/*
  211. + * MPTCP implementation
  212. + *
  213. + * Initial Design & Implementation:
  214. + * Sébastien Barré <sebastien.barre@uclouvain.be>
  215. + *
  216. + * Current Maintainer & Author:
  217. + * Christoph Paasch <christoph.paasch@uclouvain.be>
  218. + *
  219. + * Additional authors:
  220. + * Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
  221. + * Gregory Detal <gregory.detal@uclouvain.be>
  222. + * Fabien Duchêne <fabien.duchene@uclouvain.be>
  223. + * Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
  224. + * Lavkesh Lahngir <lavkesh51@gmail.com>
  225. + * Andreas Ripke <ripke@neclab.eu>
  226. + * Vlad Dogaru <vlad.dogaru@intel.com>
  227. + * Octavian Purdila <octavian.purdila@intel.com>
  228. + * John Ronan <jronan@tssg.org>
  229. + * Catalin Nicutar <catalin.nicutar@gmail.com>
  230. + * Brandon Heller <brandonh@stanford.edu>
  231. + *
  232. + *
  233. + * This program is free software; you can redistribute it and/or
  234. + * modify it under the terms of the GNU General Public License
  235. + * as published by the Free Software Foundation; either version
  236. + * 2 of the License, or (at your option) any later version.
  237. + */
  238. +
  239. +#ifndef _MPTCP_H
  240. +#define _MPTCP_H
  241. +
  242. +#include <linux/inetdevice.h>
  243. +#include <linux/ipv6.h>
  244. +#include <linux/list.h>
  245. +#include <linux/net.h>
  246. +#include <linux/netpoll.h>
  247. +#include <linux/skbuff.h>
  248. +#include <linux/socket.h>
  249. +#include <linux/tcp.h>
  250. +#include <linux/kernel.h>
  251. +
  252. +#include <asm/byteorder.h>
  253. +#include <asm/unaligned.h>
  254. +#include <crypto/hash.h>
  255. +#include <net/tcp.h>
  256. +
  257. +#if defined(__LITTLE_ENDIAN_BITFIELD)
  258. + #define ntohll(x) be64_to_cpu(x)
  259. + #define htonll(x) cpu_to_be64(x)
  260. +#elif defined(__BIG_ENDIAN_BITFIELD)
  261. + #define ntohll(x) (x)
  262. + #define htonll(x) (x)
  263. +#endif
  264. +
  265. +/* Max number of local or remote addresses we can store.
  266. + * When changing, see the bitfield below in mptcp_loc4/6. */
  267. +#define MPTCP_MAX_ADDR 8
  268. +
  269. +#define MPTCP_SUBFLOW_RETRY_DELAY 1000
  270. +
  271. +struct mptcp_loc4 {
  272. + u8 loc4_id;
  273. + u8 low_prio:1;
  274. + struct in_addr addr;
  275. +};
  276. +
  277. +struct mptcp_rem4 {
  278. + u8 rem4_id;
  279. + u8 bitfield;
  280. + u8 retry_bitfield;
  281. + __be16 port;
  282. + struct in_addr addr;
  283. +};
  284. +
  285. +struct mptcp_loc6 {
  286. + u8 loc6_id;
  287. + u8 low_prio:1;
  288. + struct in6_addr addr;
  289. +};
  290. +
  291. +struct mptcp_rem6 {
  292. + u8 rem6_id;
  293. + u8 bitfield;
  294. + u8 retry_bitfield;
  295. + __be16 port;
  296. + struct in6_addr addr;
  297. +};
  298. +
  299. +struct mptcp_request_sock {
  300. + struct tcp_request_sock req;
  301. + struct mptcp_cb *mpcb;
  302. + /* Collision list in the tuple hashtable. We need to find
  303. + * the req sock when receiving the third msg of the 3-way handshake,
  304. + * since that one does not contain the token. If this makes
  305. + * the request sock too long, we can use kmalloc'ed specific entries for
  306. + * that tuple hashtable. At the moment, though, I extend the
  307. + * request_sock.
  308. + */
  309. + struct list_head collide_tuple;
  310. + struct hlist_nulls_node collide_tk;
  311. + u32 mptcp_rem_nonce;
  312. + u32 mptcp_loc_token;
  313. + u64 mptcp_loc_key;
  314. + u64 mptcp_rem_key;
  315. + u64 mptcp_hash_tmac;
  316. + u32 mptcp_loc_nonce;
  317. + u8 loc_id;
  318. + u8 rem_id; /* Address-id in the MP_JOIN */
  319. + u8 dss_csum:1,
  320. + low_prio:1;
  321. +};
  322. +
  323. +struct mptcp_options_received {
  324. + u16 saw_mpc:1,
  325. + dss_csum:1,
  326. + drop_me:1,
  327. +
  328. + is_mp_join:1,
  329. + join_ack:1,
  330. +
  331. + saw_low_prio:2, /* 0x1 - low-prio set for this subflow
  332. + * 0x2 - low-prio set for another subflow
  333. + */
  334. + low_prio:1,
  335. +
  336. + saw_add_addr:2, /* Saw at least one add_addr option:
  337. + * 0x1: IPv4 - 0x2: IPv6
  338. + */
  339. + more_add_addr:1, /* Saw one more add-addr. */
  340. +
  341. + saw_rem_addr:1, /* Saw at least one rem_addr option */
  342. + more_rem_addr:1, /* Saw one more rem-addr. */
  343. +
  344. + mp_fail:1,
  345. + mp_fclose:1;
  346. + u8 rem_id; /* Address-id in the MP_JOIN */
  347. + u8 prio_addr_id; /* Address-id in the MP_PRIO */
  348. +
  349. + const unsigned char *add_addr_ptr; /* Pointer to add-address option */
  350. + const unsigned char *rem_addr_ptr; /* Pointer to rem-address option */
  351. +
  352. + u32 data_ack;
  353. + u32 data_seq;
  354. + u16 data_len;
  355. +
  356. + u32 mptcp_rem_token;/* Remote token */
  357. +
  358. + /* Key inside the option (from mp_capable or fast_close) */
  359. + u64 mptcp_key;
  360. +
  361. + u32 mptcp_recv_nonce;
  362. + u64 mptcp_recv_tmac;
  363. + u8 mptcp_recv_mac[20];
  364. +};
  365. +
  366. +struct mptcp_tcp_sock {
  367. + struct tcp_sock *next; /* Next subflow socket */
  368. + struct list_head cb_list;
  369. + struct mptcp_options_received rx_opt;
  370. +
  371. + /* Those three fields record the current mapping */
  372. + u64 map_data_seq;
  373. + u32 map_subseq;
  374. + u16 map_data_len;
  375. + u16 slave_sk:1,
  376. + fully_established:1,
  377. + establish_increased:1,
  378. + second_packet:1,
  379. + attached:1,
  380. + send_mp_fail:1,
  381. + include_mpc:1,
  382. + mapping_present:1,
  383. + map_data_fin:1,
  384. + low_prio:1, /* use this socket as backup */
  385. + rcv_low_prio:1, /* Peer sent low-prio option to us */
  386. + send_mp_prio:1, /* Trigger to send mp_prio on this socket */
  387. + pre_established:1; /* State between sending 3rd ACK and
  388. + * receiving the fourth ack of new subflows.
  389. + */
  390. +
  391. + /* isn: needed to translate abs to relative subflow seqnums */
  392. + u32 snt_isn;
  393. + u32 rcv_isn;
  394. + u32 last_data_seq;
  395. + u8 path_index;
  396. + u8 loc_id;
  397. + u8 rem_id;
  398. +
  399. + u32 last_rbuf_opti; /* Timestamp of last rbuf optimization */
  400. + unsigned int sent_pkts;
  401. +
  402. + struct sk_buff *shortcut_ofoqueue; /* Shortcut to the current modified
  403. + * skb in the ofo-queue.
  404. + */
  405. +
  406. + int init_rcv_wnd;
  407. + u32 infinite_cutoff_seq;
  408. + struct delayed_work work;
  409. + u32 mptcp_loc_nonce;
  410. + struct tcp_sock *tp; /* Where is my daddy? */
  411. + u32 last_end_data_seq;
  412. +
  413. + /* MP_JOIN subflow: timer for retransmitting the 3rd ack */
  414. + struct timer_list mptcp_ack_timer;
  415. +
  416. + /* HMAC of the third ack */
  417. + char sender_mac[20];
  418. +};
  419. +
  420. +struct mptcp_tw {
  421. + struct list_head list;
  422. + u64 loc_key;
  423. + u64 rcv_nxt;
  424. + struct mptcp_cb __rcu *mpcb;
  425. + u8 meta_tw:1,
  426. + in_list:1;
  427. +};
  428. +
  429. +#define MPTCP_PM_NAME_MAX 16
  430. +struct mptcp_pm_ops {
  431. + struct list_head list;
  432. +
  433. + /* Signal the creation of a new MPTCP-session. */
  434. + void (*new_session)(struct sock *meta_sk, int index);
  435. + void (*release_sock)(struct sock *meta_sk);
  436. + void (*fully_established)(struct sock *meta_sk);
  437. + void (*new_remote_address)(struct sock *meta_sk);
  438. + int (*get_local_index)(sa_family_t family, union inet_addr *addr,
  439. + struct net *net);
  440. + int (*get_local_id)(sa_family_t family, union inet_addr *addr,
  441. + struct net *net);
  442. + void (*addr_signal)(struct sock *sk, unsigned *size,
  443. + struct tcp_out_options *opts, struct sk_buff *skb);
  444. +
  445. + char name[MPTCP_PM_NAME_MAX];
  446. + struct module *owner;
  447. +};
  448. +
  449. +struct mptcp_cb {
  450. + struct sock *meta_sk;
  451. +
  452. + /* list of sockets in this multipath connection */
  453. + struct tcp_sock *connection_list;
  454. + /* list of sockets that need a call to release_cb */
  455. + struct list_head callback_list;
  456. +
  457. + spinlock_t tw_lock;
  458. + struct list_head tw_list;
  459. + unsigned char mptw_state;
  460. +
  461. + atomic_t mpcb_refcnt;
  462. +
  463. + /* High-order bits of 64-bit sequence numbers */
  464. + u32 snd_high_order[2];
  465. + u32 rcv_high_order[2];
  466. +
  467. + u16 send_infinite_mapping:1,
  468. + in_time_wait:1,
  469. + list_rcvd:1, /* XXX TO REMOVE */
  470. + dss_csum:1,
  471. + server_side:1,
  472. + infinite_mapping_rcv:1,
  473. + infinite_mapping_snd:1,
  474. + dfin_combined:1, /* Was the DFIN combined with subflow-fin? */
  475. + passive_close:1,
  476. + snd_hiseq_index:1, /* Index in snd_high_order of snd_nxt */
  477. + rcv_hiseq_index:1; /* Index in rcv_high_order of rcv_nxt */
  478. +
  479. + /* socket count in this connection */
  480. + u8 cnt_subflows;
  481. + u8 cnt_established;
  482. +
  483. + u32 noneligible; /* Path mask of temporarily non
  484. + * eligible subflows by the scheduler
  485. + */
  486. +
  487. + struct sk_buff_head reinject_queue;
  488. +
  489. + u8 dfin_path_index;
  490. +
  491. +#define MPTCP_PM_SIZE 320
  492. + u8 mptcp_pm[MPTCP_PM_SIZE] __aligned(8);
  493. + struct mptcp_pm_ops *pm_ops;
  494. +
  495. + /* Mutex needed, because otherwise mptcp_close will complain that the
  496. + * socket is owned by the user.
  497. + * E.g., mptcp_sub_close_wq is taking the meta-lock.
  498. + */
  499. + struct mutex mpcb_mutex;
  500. +
  501. + /* Master socket, also part of the connection_list, this
  502. + * socket is the one that the application sees.
  503. + */
  504. + struct sock *master_sk;
  505. +
  506. + u64 csum_cutoff_seq;
  507. +
  508. + __u64 mptcp_loc_key;
  509. + __u32 mptcp_loc_token;
  510. + __u64 mptcp_rem_key;
  511. + __u32 mptcp_rem_token;
  512. +
  513. + /* Create a new subflow - necessary because the meta-sk may be IPv4, but
  514. + * the new subflow can be IPv6
  515. + */
  516. + struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb,
  517. + struct request_sock *req,
  518. + struct dst_entry *dst);
  519. +
  520. + /* Remote addresses */
  521. + struct mptcp_rem4 remaddr4[MPTCP_MAX_ADDR];
  522. + u8 rem4_bits;
  523. +
  524. + struct mptcp_rem6 remaddr6[MPTCP_MAX_ADDR];
  525. + u8 rem6_bits;
  526. +
  527. + u32 path_index_bits;
  528. + /* Next pi to pick up in case a new path becomes available */
  529. + u8 next_path_index;
  530. +
  531. + /* Original snd/rcvbuf of the initial subflow.
  532. + * Used for the new subflows on the server-side to allow correct
  533. + * autotuning
  534. + */
  535. + int orig_sk_rcvbuf;
  536. + int orig_sk_sndbuf;
  537. + u32 orig_window_clamp;
  538. +};
  539. +
  540. +#define MPTCP_SUB_CAPABLE 0
  541. +#define MPTCP_SUB_LEN_CAPABLE_SYN 12
  542. +#define MPTCP_SUB_LEN_CAPABLE_SYN_ALIGN 12
  543. +#define MPTCP_SUB_LEN_CAPABLE_ACK 20
  544. +#define MPTCP_SUB_LEN_CAPABLE_ACK_ALIGN 20
  545. +
  546. +#define MPTCP_SUB_JOIN 1
  547. +#define MPTCP_SUB_LEN_JOIN_SYN 12
  548. +#define MPTCP_SUB_LEN_JOIN_SYN_ALIGN 12
  549. +#define MPTCP_SUB_LEN_JOIN_SYNACK 16
  550. +#define MPTCP_SUB_LEN_JOIN_SYNACK_ALIGN 16
  551. +#define MPTCP_SUB_LEN_JOIN_ACK 24
  552. +#define MPTCP_SUB_LEN_JOIN_ACK_ALIGN 24
  553. +
  554. +#define MPTCP_SUB_DSS 2
  555. +#define MPTCP_SUB_LEN_DSS 4
  556. +#define MPTCP_SUB_LEN_DSS_ALIGN 4
  557. +
  558. +/* Lengths for seq and ack are the ones without the generic MPTCP-option header,
  559. + * as they are part of the DSS-option.
  560. + * To get the total length, just add the different options together.
  561. + */
  562. +#define MPTCP_SUB_LEN_SEQ 10
  563. +#define MPTCP_SUB_LEN_SEQ_CSUM 12
  564. +#define MPTCP_SUB_LEN_SEQ_ALIGN 12
  565. +
  566. +#define MPTCP_SUB_LEN_SEQ_64 14
  567. +#define MPTCP_SUB_LEN_SEQ_CSUM_64 16
  568. +#define MPTCP_SUB_LEN_SEQ_64_ALIGN 16
  569. +
  570. +#define MPTCP_SUB_LEN_ACK 4
  571. +#define MPTCP_SUB_LEN_ACK_ALIGN 4
  572. +
  573. +#define MPTCP_SUB_LEN_ACK_64 8
  574. +#define MPTCP_SUB_LEN_ACK_64_ALIGN 8
  575. +
  576. +/* This is the "default" option-length we will send out most often.
  577. + * MPTCP DSS-header
  578. + * 32-bit data sequence number
  579. + * 32-bit data ack
  580. + *
  581. + * It is necessary to calculate the effective MSS we will be using when
  582. + * sending data.
  583. + */
  584. +#define MPTCP_SUB_LEN_DSM_ALIGN (MPTCP_SUB_LEN_DSS_ALIGN + \
  585. + MPTCP_SUB_LEN_SEQ_ALIGN + \
  586. + MPTCP_SUB_LEN_ACK_ALIGN)
  587. +
  588. +#define MPTCP_SUB_ADD_ADDR 3
  589. +#define MPTCP_SUB_LEN_ADD_ADDR4 8
  590. +#define MPTCP_SUB_LEN_ADD_ADDR6 20
  591. +#define MPTCP_SUB_LEN_ADD_ADDR4_ALIGN 8
  592. +#define MPTCP_SUB_LEN_ADD_ADDR6_ALIGN 20
  593. +
  594. +#define MPTCP_SUB_REMOVE_ADDR 4
  595. +#define MPTCP_SUB_LEN_REMOVE_ADDR 4
  596. +
  597. +#define MPTCP_SUB_PRIO 5
  598. +#define MPTCP_SUB_LEN_PRIO 3
  599. +#define MPTCP_SUB_LEN_PRIO_ADDR 4
  600. +#define MPTCP_SUB_LEN_PRIO_ALIGN 4
  601. +
  602. +#define MPTCP_SUB_FAIL 6
  603. +#define MPTCP_SUB_LEN_FAIL 12
  604. +#define MPTCP_SUB_LEN_FAIL_ALIGN 12
  605. +
  606. +#define MPTCP_SUB_FCLOSE 7
  607. +#define MPTCP_SUB_LEN_FCLOSE 12
  608. +#define MPTCP_SUB_LEN_FCLOSE_ALIGN 12
  609. +
  610. +
  611. +#define OPTION_MPTCP (1 << 5)
  612. +
  613. +static inline void reset_mpc(struct tcp_sock *tp)
  614. +{
  615. + tp->mpc = 0;
  616. +
  617. + tp->__select_window = __tcp_select_window;
  618. + tp->select_window = tcp_select_window;
  619. + tp->select_initial_window = tcp_select_initial_window;
  620. + tp->init_buffer_space = tcp_init_buffer_space;
  621. + tp->set_rto = tcp_set_rto;
  622. + tp->should_expand_sndbuf = tcp_should_expand_sndbuf;
  623. +}
  624. +
  625. +/* Initializes MPTCP flags in tcp_sock (and other tcp_sock members that depend
  626. + * on those flags).
  627. + */
  628. +static inline void mptcp_init_tcp_sock(struct tcp_sock *tp)
  629. +{
  630. + reset_mpc(tp);
  631. +}
  632. +
  633. +#ifdef CONFIG_MPTCP
  634. +
  635. +/* Used for checking if the mptcp initialization has been successful */
  636. +extern bool mptcp_init_failed;
  637. +
  638. +/* MPTCP options */
  639. +#define OPTION_TYPE_SYN (1 << 0)
  640. +#define OPTION_TYPE_SYNACK (1 << 1)
  641. +#define OPTION_TYPE_ACK (1 << 2)
  642. +#define OPTION_MP_CAPABLE (1 << 3)
  643. +#define OPTION_DATA_ACK (1 << 4)
  644. +#define OPTION_ADD_ADDR (1 << 5)
  645. +#define OPTION_MP_JOIN (1 << 6)
  646. +#define OPTION_MP_FAIL (1 << 7)
  647. +#define OPTION_MP_FCLOSE (1 << 8)
  648. +#define OPTION_REMOVE_ADDR (1 << 9)
  649. +#define OPTION_MP_PRIO (1 << 10)
  650. +
  651. +/* MPTCP flags */
  652. +#define MPTCPHDR_ACK 0x01
  653. +#define MPTCPHDR_SEQ 0x02
  654. +#define MPTCPHDR_FIN 0x04
  655. +#define MPTCPHDR_INF 0x08
  656. +#define MPTCPHDR_SEQ64_SET 0x10 /* Did we received a 64-bit seq number */
  657. +#define MPTCPHDR_SEQ64_OFO 0x20 /* Is it not in our circular array? */
  658. +#define MPTCPHDR_SEQ64_INDEX 0x40 /* Index of seq in mpcb->snd_high_order */
  659. +#define MPTCPHDR_DSS_CSUM 0x80
  660. +
  661. +/* It is impossible, that all 8 bits of mptcp_flags are set to 1 with the above
  662. + * Thus, defining MPTCPHDR_JOIN as 0xFF is safe.
  663. + */
  664. +#define MPTCPHDR_JOIN 0xFF
  665. +
  666. +struct mptcp_option {
  667. + __u8 kind;
  668. + __u8 len;
  669. +#if defined(__LITTLE_ENDIAN_BITFIELD)
  670. + __u8 ver:4,
  671. + sub:4;
  672. +#elif defined(__BIG_ENDIAN_BITFIELD)
  673. + __u8 sub:4,
  674. + ver:4;
  675. +#else
  676. +#error "Adjust your <asm/byteorder.h> defines"
  677. +#endif
  678. +};
  679. +
  680. +struct mp_capable {
  681. + __u8 kind;
  682. + __u8 len;
  683. +#if defined(__LITTLE_ENDIAN_BITFIELD)
  684. + __u8 ver:4,
  685. + sub:4;
  686. + __u8 h:1,
  687. + rsv:5,
  688. + b:1,
  689. + a:1;
  690. +#elif defined(__BIG_ENDIAN_BITFIELD)
  691. + __u8 sub:4,
  692. + ver:4;
  693. + __u8 a:1,
  694. + b:1,
  695. + rsv:5,
  696. + h:1;
  697. +#else
  698. +#error "Adjust your <asm/byteorder.h> defines"
  699. +#endif
  700. + __u64 sender_key;
  701. + __u64 receiver_key;
  702. +} __attribute__((__packed__));
  703. +
  704. +struct mp_join {
  705. + __u8 kind;
  706. + __u8 len;
  707. +#if defined(__LITTLE_ENDIAN_BITFIELD)
  708. + __u8 b:1,
  709. + rsv:3,
  710. + sub:4;
  711. +#elif defined(__BIG_ENDIAN_BITFIELD)
  712. + __u8 sub:4,
  713. + rsv:3,
  714. + b:1;
  715. +#else
  716. +#error "Adjust your <asm/byteorder.h> defines"
  717. +#endif
  718. + __u8 addr_id;
  719. + union {
  720. + struct {
  721. + u32 token;
  722. + u32 nonce;
  723. + } syn;
  724. + struct {
  725. + __u64 mac;
  726. + u32 nonce;
  727. + } synack;
  728. + struct {
  729. + __u8 mac[20];
  730. + } ack;
  731. + } u;
  732. +} __attribute__((__packed__));
  733. +
  734. +struct mp_dss {
  735. + __u8 kind;
  736. + __u8 len;
  737. +#if defined(__LITTLE_ENDIAN_BITFIELD)
  738. + __u16 rsv1:4,
  739. + sub:4,
  740. + A:1,
  741. + a:1,
  742. + M:1,
  743. + m:1,
  744. + F:1,
  745. + rsv2:3;
  746. +#elif defined(__BIG_ENDIAN_BITFIELD)
  747. + __u16 sub:4,
  748. + rsv1:4,
  749. + rsv2:3,
  750. + F:1,
  751. + m:1,
  752. + M:1,
  753. + a:1,
  754. + A:1;
  755. +#else
  756. +#error "Adjust your <asm/byteorder.h> defines"
  757. +#endif
  758. +};
  759. +
  760. +struct mp_add_addr {
  761. + __u8 kind;
  762. + __u8 len;
  763. +#if defined(__LITTLE_ENDIAN_BITFIELD)
  764. + __u8 ipver:4,
  765. + sub:4;
  766. +#elif defined(__BIG_ENDIAN_BITFIELD)
  767. + __u8 sub:4,
  768. + ipver:4;
  769. +#else
  770. +#error "Adjust your <asm/byteorder.h> defines"
  771. +#endif
  772. + __u8 addr_id;
  773. + union {
  774. + struct {
  775. + struct in_addr addr;
  776. + __be16 port;
  777. + } v4;
  778. + struct {
  779. + struct in6_addr addr;
  780. + __be16 port;
  781. + } v6;
  782. + } u;
  783. +} __attribute__((__packed__));
  784. +
  785. +struct mp_remove_addr {
  786. + __u8 kind;
  787. + __u8 len;
  788. +#if defined(__LITTLE_ENDIAN_BITFIELD)
  789. + __u8 rsv:4,
  790. + sub:4;
  791. +#elif defined(__BIG_ENDIAN_BITFIELD)
  792. + __u8 sub:4,
  793. + rsv:4;
  794. +#else
  795. +#error "Adjust your <asm/byteorder.h> defines"
  796. +#endif
  797. + /* list of addr_id */
  798. + __u8 addrs_id;
  799. +};
  800. +
  801. +struct mp_fail {
  802. + __u8 kind;
  803. + __u8 len;
  804. +#if defined(__LITTLE_ENDIAN_BITFIELD)
  805. + __u16 rsv1:4,
  806. + sub:4,
  807. + rsv2:8;
  808. +#elif defined(__BIG_ENDIAN_BITFIELD)
  809. + __u16 sub:4,
  810. + rsv1:4,
  811. + rsv2:8;
  812. +#else
  813. +#error "Adjust your <asm/byteorder.h> defines"
  814. +#endif
  815. + __be64 data_seq;
  816. +} __attribute__((__packed__));
  817. +
  818. +struct mp_fclose {
  819. + __u8 kind;
  820. + __u8 len;
  821. +#if defined(__LITTLE_ENDIAN_BITFIELD)
  822. + __u16 rsv1:4,
  823. + sub:4,
  824. + rsv2:8;
  825. +#elif defined(__BIG_ENDIAN_BITFIELD)
  826. + __u16 sub:4,
  827. + rsv1:4,
  828. + rsv2:8;
  829. +#else
  830. +#error "Adjust your <asm/byteorder.h> defines"
  831. +#endif
  832. + __u64 key;
  833. +} __attribute__((__packed__));
  834. +
  835. +struct mp_prio {
  836. + __u8 kind;
  837. + __u8 len;
  838. +#if defined(__LITTLE_ENDIAN_BITFIELD)
  839. + __u8 b:1,
  840. + rsv:3,
  841. + sub:4;
  842. +#elif defined(__BIG_ENDIAN_BITFIELD)
  843. + __u8 sub:4,
  844. + rsv:3,
  845. + b:1;
  846. +#else
  847. +#error "Adjust your <asm/byteorder.h> defines"
  848. +#endif
  849. + __u8 addr_id;
  850. +} __attribute__((__packed__));
  851. +
  852. +static inline int mptcp_sub_len_dss(struct mp_dss *m, int csum)
  853. +{
  854. + return 4 + m->A * (4 + m->a * 4) + m->M * (10 + m->m * 4 + csum * 2);
  855. +}
  856. +
  857. +#define MPTCP_APP 2
  858. +
  859. +extern int sysctl_mptcp_enabled;
  860. +extern int sysctl_mptcp_checksum;
  861. +extern int sysctl_mptcp_debug;
  862. +extern int sysctl_mptcp_syn_retries;
  863. +
  864. +extern struct workqueue_struct *mptcp_wq;
  865. +
  866. +#define mptcp_debug(fmt, args...) \
  867. + do { \
  868. + if (unlikely(sysctl_mptcp_debug)) \
  869. + pr_err(__FILE__ ": " fmt, ##args); \
  870. + } while (0)
  871. +
  872. +/* Iterates over all subflows */
  873. +#define mptcp_for_each_tp(mpcb, tp) \
  874. + for ((tp) = (mpcb)->connection_list; (tp); (tp) = (tp)->mptcp->next)
  875. +
  876. +#define mptcp_for_each_sk(mpcb, sk) \
  877. + for ((sk) = (struct sock *)(mpcb)->connection_list; \
  878. + sk; \
  879. + sk = (struct sock *)tcp_sk(sk)->mptcp->next)
  880. +
  881. +#define mptcp_for_each_sk_safe(__mpcb, __sk, __temp) \
  882. + for (__sk = (struct sock *)(__mpcb)->connection_list, \
  883. + __temp = __sk ? (struct sock *)tcp_sk(__sk)->mptcp->next : NULL; \
  884. + __sk; \
  885. + __sk = __temp, \
  886. + __temp = __sk ? (struct sock *)tcp_sk(__sk)->mptcp->next : NULL)
  887. +
  888. +/* Iterates over all bit set to 1 in a bitset */
  889. +#define mptcp_for_each_bit_set(b, i) \
  890. + for (i = ffs(b) - 1; i >= 0; i = ffs(b >> (i + 1) << (i + 1)) - 1)
  891. +
  892. +#define mptcp_for_each_bit_unset(b, i) \
  893. + mptcp_for_each_bit_set(~b, i)
  894. +
  895. +extern struct lock_class_key meta_key;
  896. +extern struct lock_class_key meta_slock_key;
  897. +extern u32 mptcp_secret[MD5_MESSAGE_BYTES / 4];
  898. +
  899. +/* This is needed to ensure that two subsequent key-generation result in
  900. + * different keys if the IPs and ports are the same.
  901. + */
  902. +extern u32 mptcp_key_seed;
  903. +
  904. +#define MPTCP_HASH_SIZE 1024
  905. +
  906. +extern struct hlist_nulls_head tk_hashtable[MPTCP_HASH_SIZE];
  907. +
  908. +/* This second hashtable is needed to retrieve request socks
  909. + * created as a result of a join request. While the SYN contains
  910. + * the token, the final ack does not, so we need a separate hashtable
  911. + * to retrieve the mpcb.
  912. + */
  913. +extern struct list_head mptcp_reqsk_htb[MPTCP_HASH_SIZE];
  914. +extern spinlock_t mptcp_reqsk_hlock; /* hashtable protection */
  915. +
  916. +/* Lock, protecting the two hash-tables that hold the token. Namely,
  917. + * mptcp_reqsk_tk_htb and tk_hashtable
  918. + */
  919. +extern spinlock_t mptcp_tk_hashlock; /* hashtable protection */
  920. +
  921. +void mptcp_data_ready(struct sock *sk, int bytes);
  922. +void mptcp_write_space(struct sock *sk);
  923. +
  924. +void mptcp_add_meta_ofo_queue(struct sock *meta_sk, struct sk_buff *skb,
  925. + struct sock *sk);
  926. +void mptcp_ofo_queue(struct sock *meta_sk);
  927. +void mptcp_purge_ofo_queue(struct tcp_sock *meta_tp);
  928. +void mptcp_cleanup_rbuf(struct sock *meta_sk, int copied);
  929. +int mptcp_alloc_mpcb(struct sock *master_sk, __u64 remote_key, u32 window);
  930. +int mptcp_add_sock(struct sock *meta_sk, struct sock *sk, u8 loc_id, u8 rem_id,
  931. + gfp_t flags);
  932. +void mptcp_del_sock(struct sock *sk);
  933. +void mptcp_update_metasocket(struct sock *sock, struct sock *meta_sk);
  934. +void mptcp_reinject_data(struct sock *orig_sk, int clone_it);
  935. +void mptcp_update_sndbuf(struct mptcp_cb *mpcb);
  936. +struct sk_buff *mptcp_next_segment(struct sock *sk, int *reinject);
  937. +void mptcp_send_fin(struct sock *meta_sk);
  938. +void mptcp_send_active_reset(struct sock *meta_sk, gfp_t priority);
  939. +int mptcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
  940. + int push_one, gfp_t gfp);
  941. +void mptcp_parse_options(const uint8_t *ptr, int opsize,
  942. + struct tcp_options_received *opt_rx,
  943. + struct mptcp_options_received *mopt,
  944. + const struct sk_buff *skb);
  945. +void mptcp_syn_options(struct sock *sk, struct tcp_out_options *opts,
  946. + unsigned *remaining);
  947. +void mptcp_synack_options(struct request_sock *req,
  948. + struct tcp_out_options *opts,
  949. + unsigned *remaining);
  950. +void mptcp_established_options(struct sock *sk, struct sk_buff *skb,
  951. + struct tcp_out_options *opts, unsigned *size);
  952. +void mptcp_options_write(__be32 *ptr, struct tcp_sock *tp,
  953. + struct tcp_out_options *opts,
  954. + struct sk_buff *skb);
  955. +void mptcp_close(struct sock *meta_sk, long timeout);
  956. +int mptcp_doit(struct sock *sk);
  957. +int mptcp_create_master_sk(struct sock *meta_sk, __u64 remote_key, u32 window);
  958. +int mptcp_check_req_master(struct sock *sk, struct sock *child,
  959. + struct request_sock *req,
  960. + struct request_sock **prev,
  961. + struct mptcp_options_received *mopt);
  962. +struct sock *mptcp_check_req_child(struct sock *sk, struct sock *child,
  963. + struct request_sock *req,
  964. + struct request_sock **prev,
  965. + struct mptcp_options_received *mopt);
  966. +u32 __mptcp_select_window(struct sock *sk);
  967. +void mptcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd,
  968. + __u32 *window_clamp, int wscale_ok,
  969. + __u8 *rcv_wscale, __u32 init_rcv_wnd,
  970. + const struct sock *sk);
  971. +unsigned int mptcp_current_mss(struct sock *meta_sk);
  972. +int mptcp_select_size(const struct sock *meta_sk, bool sg);
  973. +void mptcp_key_sha1(u64 key, u32 *token, u64 *idsn);
  974. +void mptcp_hmac_sha1(u8 *key_1, u8 *key_2, u8 *rand_1, u8 *rand_2,
  975. + u32 *hash_out);
  976. +void mptcp_clean_rtx_infinite(struct sk_buff *skb, struct sock *sk);
  977. +void mptcp_fin(struct sock *meta_sk);
  978. +void mptcp_retransmit_timer(struct sock *meta_sk);
  979. +int mptcp_write_wakeup(struct sock *meta_sk);
  980. +void mptcp_sub_close_wq(struct work_struct *work);
  981. +void mptcp_sub_close(struct sock *sk, unsigned long delay);
  982. +struct sock *mptcp_select_ack_sock(const struct sock *meta_sk, int copied);
  983. +void mptcp_fallback_meta_sk(struct sock *meta_sk);
  984. +int mptcp_backlog_rcv(struct sock *meta_sk, struct sk_buff *skb);
  985. +struct sock *mptcp_sk_clone(const struct sock *sk, int family, const gfp_t priority);
  986. +void mptcp_ack_handler(unsigned long);
  987. +int mptcp_check_rtt(const struct tcp_sock *tp, int time);
  988. +int mptcp_check_snd_buf(const struct tcp_sock *tp);
  989. +int mptcp_handle_options(struct sock *sk, const struct tcphdr *th, struct sk_buff *skb);
  990. +void __init mptcp_init(void);
  991. +int mptcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len);
  992. +int mptcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
  993. + unsigned int mss_now, int reinject);
  994. +int mptso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
  995. + unsigned int mss_now, gfp_t gfp, int reinject);
  996. +void mptcp_destroy_sock(struct sock *sk);
  997. +int mptcp_rcv_synsent_state_process(struct sock *sk, struct sock **skptr,
  998. + struct sk_buff *skb,
  999. + struct mptcp_options_received *mopt);
  1000. +unsigned int mptcp_xmit_size_goal(struct sock *meta_sk, u32 mss_now,
  1001. + int large_allowed);
  1002. +int mptcp_time_wait(struct sock *sk, struct tcp_timewait_sock *tw);
  1003. +void mptcp_twsk_destructor(struct tcp_timewait_sock *tw);
  1004. +void mptcp_update_tw_socks(const struct tcp_sock *tp, int state);
  1005. +void mptcp_disconnect(struct sock *sk);
  1006. +bool mptcp_should_expand_sndbuf(const struct sock *sk);
  1007. +int mptcp_retransmit_skb(struct sock *meta_sk, struct sk_buff *skb);
  1008. +void mptcp_tsq_flags(struct sock *sk);
  1009. +void mptcp_tsq_sub_deferred(struct sock *meta_sk);
  1010. +struct mp_join *mptcp_find_join(struct sk_buff *skb);
  1011. +void mptcp_hash_remove_bh(struct tcp_sock *meta_tp);
  1012. +void mptcp_hash_remove(struct tcp_sock *meta_tp);
  1013. +struct sock *mptcp_hash_find(struct net *net, u32 token);
  1014. +int mptcp_lookup_join(struct sk_buff *skb, struct inet_timewait_sock *tw);
  1015. +int mptcp_do_join_short(struct sk_buff *skb, struct mptcp_options_received *mopt,
  1016. + struct tcp_options_received *tmp_opt, struct net *net);
  1017. +void mptcp_reqsk_destructor(struct request_sock *req);
  1018. +void mptcp_reqsk_new_mptcp(struct request_sock *req,
  1019. + const struct tcp_options_received *rx_opt,
  1020. + const struct mptcp_options_received *mopt,
  1021. + const struct sk_buff *skb);
  1022. +int mptcp_check_req(struct sk_buff *skb, struct net *net);
  1023. +void mptcp_connect_init(struct sock *sk);
  1024. +void mptcp_sub_force_close(struct sock *sk);
  1025. +int mptcp_sub_len_remove_addr_align(u16 bitfield);
  1026. +void mptcp_remove_shortcuts(const struct mptcp_cb *mpcb,
  1027. + const struct sk_buff *skb);
  1028. +void mptcp_init_buffer_space(struct sock *sk);
  1029. +
  1030. +/* MPTCP-path-manager registration/initialization functions */
  1031. +int mptcp_register_path_manager(struct mptcp_pm_ops *pm);
  1032. +void mptcp_unregister_path_manager(struct mptcp_pm_ops *pm);
  1033. +void mptcp_init_path_manager(struct mptcp_cb *mpcb);
  1034. +void mptcp_cleanup_path_manager(struct mptcp_cb *mpcb);
  1035. +void mptcp_fallback_default(struct mptcp_cb *mpcb);
  1036. +void mptcp_get_default_path_manager(char *name);
  1037. +int mptcp_set_default_path_manager(const char *name);
  1038. +extern struct mptcp_pm_ops mptcp_pm_default;
  1039. +
  1040. +static inline
  1041. +struct mptcp_request_sock *mptcp_rsk(const struct request_sock *req)
  1042. +{
  1043. + return (struct mptcp_request_sock *)req;
  1044. +}
  1045. +
  1046. +static inline
  1047. +struct request_sock *rev_mptcp_rsk(const struct mptcp_request_sock *req)
  1048. +{
  1049. + return (struct request_sock *)req;
  1050. +}
  1051. +
  1052. +static inline bool mptcp_can_sendpage(struct sock *sk)
  1053. +{
  1054. + struct sock *sk_it;
  1055. +
  1056. + if (tcp_sk(sk)->mpcb->dss_csum)
  1057. + return false;
  1058. +
  1059. + mptcp_for_each_sk(tcp_sk(sk)->mpcb, sk_it) {
  1060. + if (!(sk_it->sk_route_caps & NETIF_F_SG) ||
  1061. + !(sk_it->sk_route_caps & NETIF_F_ALL_CSUM))
  1062. + return false;
  1063. + }
  1064. +
  1065. + return true;
  1066. +}
  1067. +
  1068. +static inline void mptcp_push_pending_frames(struct sock *meta_sk)
  1069. +{
  1070. + if (mptcp_next_segment(meta_sk, NULL)) {
  1071. + struct tcp_sock *tp = tcp_sk(meta_sk);
  1072. +
  1073. + /* We don't care about the MSS, because it will be set in
  1074. + * mptcp_write_xmit.
  1075. + */
  1076. + __tcp_push_pending_frames(meta_sk, 0, tp->nonagle);
  1077. + }
  1078. +}
  1079. +
  1080. +static inline void mptcp_send_reset(struct sock *sk)
  1081. +{
  1082. + tcp_send_active_reset(sk, GFP_ATOMIC);
  1083. + mptcp_sub_force_close(sk);
  1084. +}
  1085. +
  1086. +static inline int mptcp_is_data_seq(const struct sk_buff *skb)
  1087. +{
  1088. + return TCP_SKB_CB(skb)->mptcp_flags & MPTCPHDR_SEQ;
  1089. +}
  1090. +
  1091. +static inline int mptcp_is_data_fin(const struct sk_buff *skb)
  1092. +{
  1093. + return mptcp_is_data_seq(skb) &&
  1094. + (TCP_SKB_CB(skb)->mptcp_flags & MPTCPHDR_FIN);
  1095. +}
  1096. +
  1097. +/* Is it a data-fin while in infinite mapping mode?
  1098. + * In infinite mode, a subflow-fin is in fact a data-fin.
  1099. + */
  1100. +static inline int mptcp_is_data_fin2(const struct sk_buff *skb,
  1101. + const struct tcp_sock *tp)
  1102. +{
  1103. + return mptcp_is_data_fin(skb) ||
  1104. + (tp->mpcb->infinite_mapping_rcv && tcp_hdr(skb)->fin);
  1105. +}
  1106. +
  1107. +static inline void mptcp_skb_entail_init(const struct tcp_sock *tp,
  1108. + struct sk_buff *skb)
  1109. +{
  1110. + TCP_SKB_CB(skb)->mptcp_flags = MPTCPHDR_SEQ;
  1111. +}
  1112. +
  1113. +static inline u8 mptcp_get_64_bit(u64 data_seq, struct mptcp_cb *mpcb)
  1114. +{
  1115. + u64 data_seq_high = (u32)(data_seq >> 32);
  1116. +
  1117. + if (mpcb->rcv_high_order[0] == data_seq_high)
  1118. + return 0;
  1119. + else if (mpcb->rcv_high_order[1] == data_seq_high)
  1120. + return MPTCPHDR_SEQ64_INDEX;
  1121. + else
  1122. + return MPTCPHDR_SEQ64_OFO;
  1123. +}
  1124. +
  1125. +/* Sets the data_seq and returns pointer to the in-skb field of the data_seq.
  1126. + * If the packet has a 64-bit dseq, the pointer points to the last 32 bits.
  1127. + */
  1128. +static inline __u32 *mptcp_skb_set_data_seq(const struct sk_buff *skb,
  1129. + u32 *data_seq,
  1130. + struct mptcp_cb *mpcb)
  1131. +{
  1132. + __u32 *ptr = (__u32 *)(skb_transport_header(skb) + TCP_SKB_CB(skb)->dss_off);
  1133. +
  1134. + if (TCP_SKB_CB(skb)->mptcp_flags & MPTCPHDR_SEQ64_SET) {
  1135. + u64 data_seq64 = get_unaligned_be64(ptr);
  1136. +
  1137. + if (mpcb)
  1138. + TCP_SKB_CB(skb)->mptcp_flags |= mptcp_get_64_bit(data_seq64, mpcb);
  1139. +
  1140. + *data_seq = (u32)data_seq64 ;
  1141. + ptr++;
  1142. + } else {
  1143. + *data_seq = get_unaligned_be32(ptr);
  1144. + }
  1145. +
  1146. + return ptr;
  1147. +}
  1148. +
  1149. +static inline struct sock *mptcp_meta_sk(const struct sock *sk)
  1150. +{
  1151. + return tcp_sk(sk)->meta_sk;
  1152. +}
  1153. +
  1154. +static inline struct tcp_sock *mptcp_meta_tp(const struct tcp_sock *tp)
  1155. +{
  1156. + return tcp_sk(tp->meta_sk);
  1157. +}
  1158. +
  1159. +static inline int is_meta_tp(const struct tcp_sock *tp)
  1160. +{
  1161. + return tp->mpcb && mptcp_meta_tp(tp) == tp;
  1162. +}
  1163. +
  1164. +static inline int is_meta_sk(const struct sock *sk)
  1165. +{
  1166. + return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP &&
  1167. + tcp_sk(sk)->mpc && mptcp_meta_sk(sk) == sk;
  1168. +}
  1169. +
  1170. +static inline int is_master_tp(const struct tcp_sock *tp)
  1171. +{
  1172. + return !tp->mpc || (!tp->mptcp->slave_sk && !is_meta_tp(tp));
  1173. +}
  1174. +
  1175. +static inline void mptcp_hash_request_remove(struct request_sock *req)
  1176. +{
  1177. + int in_softirq = 0;
  1178. +
  1179. + if (list_empty(&mptcp_rsk(req)->collide_tuple))
  1180. + return;
  1181. +
  1182. + if (in_softirq()) {
  1183. + spin_lock(&mptcp_reqsk_hlock);
  1184. + in_softirq = 1;
  1185. + } else {
  1186. + spin_lock_bh(&mptcp_reqsk_hlock);
  1187. + }
  1188. +
  1189. + list_del(&mptcp_rsk(req)->collide_tuple);
  1190. +
  1191. + if (in_softirq)
  1192. + spin_unlock(&mptcp_reqsk_hlock);
  1193. + else
  1194. + spin_unlock_bh(&mptcp_reqsk_hlock);
  1195. +}
  1196. +
  1197. +static inline void mptcp_init_mp_opt(struct mptcp_options_received *mopt)
  1198. +{
  1199. + mopt->saw_mpc = 0;
  1200. + mopt->dss_csum = 0;
  1201. + mopt->drop_me = 0;
  1202. +
  1203. + mopt->is_mp_join = 0;
  1204. + mopt->join_ack = 0;
  1205. +
  1206. + mopt->saw_low_prio = 0;
  1207. + mopt->low_prio = 0;
  1208. +
  1209. + mopt->saw_add_addr = 0;
  1210. + mopt->more_add_addr = 0;
  1211. +
  1212. + mopt->saw_rem_addr = 0;
  1213. + mopt->more_rem_addr = 0;
  1214. +
  1215. + mopt->mp_fail = 0;
  1216. + mopt->mp_fclose = 0;
  1217. +}
  1218. +
  1219. +static inline void mptcp_reset_mopt(struct tcp_sock *tp)
  1220. +{
  1221. + struct mptcp_options_received *mopt = &tp->mptcp->rx_opt;
  1222. +
  1223. + mopt->saw_low_prio = 0;
  1224. + mopt->saw_add_addr = 0;
  1225. + mopt->more_add_addr = 0;
  1226. + mopt->saw_rem_addr = 0;
  1227. + mopt->more_rem_addr = 0;
  1228. + mopt->join_ack = 0;
  1229. + mopt->mp_fail = 0;
  1230. + mopt->mp_fclose = 0;
  1231. +}
  1232. +
  1233. +static inline __be32 mptcp_get_highorder_sndbits(const struct sk_buff *skb,
  1234. + const struct mptcp_cb *mpcb)
  1235. +{
  1236. + return htonl(mpcb->snd_high_order[(TCP_SKB_CB(skb)->mptcp_flags &
  1237. + MPTCPHDR_SEQ64_INDEX) ? 1 : 0]);
  1238. +}
  1239. +
  1240. +static inline u64 mptcp_get_data_seq_64(const struct mptcp_cb *mpcb, int index,
  1241. + u32 data_seq_32)
  1242. +{
  1243. + return ((u64)mpcb->rcv_high_order[index] << 32) | data_seq_32;
  1244. +}
  1245. +
  1246. +static inline u64 mptcp_get_rcv_nxt_64(const struct tcp_sock *meta_tp)
  1247. +{
  1248. + struct mptcp_cb *mpcb = meta_tp->mpcb;
  1249. + return mptcp_get_data_seq_64(mpcb, mpcb->rcv_hiseq_index,
  1250. + meta_tp->rcv_nxt);
  1251. +}
  1252. +
  1253. +static inline void mptcp_check_sndseq_wrap(struct tcp_sock *meta_tp, int inc)
  1254. +{
  1255. + if (unlikely(meta_tp->snd_nxt > meta_tp->snd_nxt + inc)) {
  1256. + struct mptcp_cb *mpcb = meta_tp->mpcb;
  1257. + mpcb->snd_hiseq_index = mpcb->snd_hiseq_index ? 0 : 1;
  1258. + mpcb->snd_high_order[mpcb->snd_hiseq_index] += 2;
  1259. + }
  1260. +}
  1261. +
  1262. +static inline void mptcp_check_rcvseq_wrap(struct tcp_sock *meta_tp,
  1263. + u32 old_rcv_nxt)
  1264. +{
  1265. + if (unlikely(old_rcv_nxt > meta_tp->rcv_nxt)) {
  1266. + struct mptcp_cb *mpcb = meta_tp->mpcb;
  1267. + mpcb->rcv_high_order[mpcb->rcv_hiseq_index] += 2;
  1268. + mpcb->rcv_hiseq_index = mpcb->rcv_hiseq_index ? 0 : 1;
  1269. + }
  1270. +}
  1271. +
  1272. +static inline int mptcp_sk_can_send(const struct sock *sk)
  1273. +{
  1274. + return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
  1275. + !tcp_sk(sk)->mptcp->pre_established;
  1276. +}
  1277. +
  1278. +static inline int mptcp_sk_can_recv(const struct sock *sk)
  1279. +{
  1280. + return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCP_FIN_WAIT1 | TCP_FIN_WAIT2);
  1281. +}
  1282. +
  1283. +static inline int mptcp_sk_can_send_ack(const struct sock *sk)
  1284. +{
  1285. + return !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV |
  1286. + TCPF_CLOSE | TCPF_LISTEN)) &&
  1287. + !tcp_sk(sk)->mptcp->pre_established;
  1288. +}
  1289. +
  1290. +/* Only support GSO if all subflows supports it */
  1291. +static inline bool mptcp_sk_can_gso(const struct sock *meta_sk)
  1292. +{
  1293. + struct sock *sk;
  1294. +
  1295. + if (tcp_sk(meta_sk)->mpcb->dss_csum)
  1296. + return 0;
  1297. +
  1298. + mptcp_for_each_sk(tcp_sk(meta_sk)->mpcb, sk) {
  1299. + if (!mptcp_sk_can_send(sk))
  1300. + continue;
  1301. + if (!sk_can_gso(sk))
  1302. + return false;
  1303. + }
  1304. + return true;
  1305. +}
  1306. +
  1307. +static inline bool mptcp_can_sg(const struct sock *meta_sk)
  1308. +{
  1309. + struct sock *sk;
  1310. +
  1311. + if (tcp_sk(meta_sk)->mpcb->dss_csum)
  1312. + return 0;
  1313. +
  1314. + mptcp_for_each_sk(tcp_sk(meta_sk)->mpcb, sk) {
  1315. + if (!mptcp_sk_can_send(sk))
  1316. + continue;
  1317. + if (!(sk->sk_route_caps & NETIF_F_SG))
  1318. + return false;
  1319. + }
  1320. + return true;
  1321. +}
  1322. +
  1323. +static inline void mptcp_set_rto(struct sock *sk)
  1324. +{
  1325. + struct tcp_sock *tp = tcp_sk(sk);
  1326. + struct sock *sk_it;
  1327. + struct inet_connection_sock *micsk = inet_csk(mptcp_meta_sk(sk));
  1328. + __u32 max_rto = 0;
  1329. +
  1330. + /* We are in recovery-phase on the MPTCP-level. Do not update the
  1331. + * RTO, because this would kill exponential backoff.
  1332. + */
  1333. + if (micsk->icsk_retransmits)
  1334. + return;
  1335. +
  1336. + mptcp_for_each_sk(tp->mpcb, sk_it) {
  1337. + if (mptcp_sk_can_send(sk_it) &&
  1338. + inet_csk(sk_it)->icsk_rto > max_rto)
  1339. + max_rto = inet_csk(sk_it)->icsk_rto;
  1340. + }
  1341. + if (max_rto) {
  1342. + micsk->icsk_rto = max_rto << 1;
  1343. +
  1344. + /* A successfull rto-measurement - reset backoff counter */
  1345. + micsk->icsk_backoff = 0;
  1346. + }
  1347. +}
  1348. +
  1349. +static inline int mptcp_sysctl_syn_retries(void)
  1350. +{
  1351. + return sysctl_mptcp_syn_retries;
  1352. +}
  1353. +
  1354. +static inline void mptcp_sub_close_passive(struct sock *sk)
  1355. +{
  1356. + struct sock *meta_sk = mptcp_meta_sk(sk);
  1357. + struct tcp_sock *tp = tcp_sk(sk), *meta_tp = tcp_sk(meta_sk);
  1358. +
  1359. + /* Only close, if the app did a send-shutdown (passive close), and we
  1360. + * received the data-ack of the data-fin.
  1361. + */
  1362. + if (tp->mpcb->passive_close && meta_tp->snd_una == meta_tp->write_seq)
  1363. + mptcp_sub_close(sk, 0);
  1364. +}
  1365. +
  1366. +static inline bool mptcp_fallback_infinite(struct sock *sk, int flag)
  1367. +{
  1368. + struct tcp_sock *tp = tcp_sk(sk);
  1369. +
  1370. + /* If data has been acknowleged on the meta-level, fully_established
  1371. + * will have been set before and thus we will not fall back to infinite
  1372. + * mapping.
  1373. + */
  1374. + if (likely(tp->mptcp->fully_established))
  1375. + return false;
  1376. +
  1377. + if (!(flag & MPTCP_FLAG_DATA_ACKED))
  1378. + return false;
  1379. +
  1380. + /* Don't fallback twice ;) */
  1381. + if (tp->mpcb->infinite_mapping_snd)
  1382. + return false;
  1383. +
  1384. + pr_err("%s %#x will fallback - pi %d, src %pI4 dst %pI4 from %pS\n",
  1385. + __func__, tp->mpcb->mptcp_loc_token, tp->mptcp->path_index,
  1386. + &inet_sk(sk)->inet_saddr, &inet_sk(sk)->inet_daddr,
  1387. + __builtin_return_address(0));
  1388. + if (!is_master_tp(tp))
  1389. + return true;
  1390. +
  1391. + tp->mpcb->infinite_mapping_snd = 1;
  1392. + tp->mpcb->infinite_mapping_rcv = 1;
  1393. + tp->mptcp->fully_established = 1;
  1394. +
  1395. + return false;
  1396. +}
  1397. +
  1398. +/* Find the first free index in the bitfield */
  1399. +static inline int __mptcp_find_free_index(u8 bitfield, int j, u8 base)
  1400. +{
  1401. + int i;
  1402. + mptcp_for_each_bit_unset(bitfield >> base, i) {
  1403. + /* We wrapped at the bitfield - try from 0 on */
  1404. + if (i + base >= sizeof(bitfield) * 8) {
  1405. + mptcp_for_each_bit_unset(bitfield, i) {
  1406. + if (i >= sizeof(bitfield) * 8)
  1407. + goto exit;
  1408. +
  1409. + if (i != j)
  1410. + return i;
  1411. + }
  1412. + goto exit;
  1413. + }
  1414. + if (i + base >= sizeof(bitfield) * 8)
  1415. + break;
  1416. +
  1417. + if (i + base != j)
  1418. + return i + base;
  1419. + }
  1420. +exit:
  1421. + return -1;
  1422. +}
  1423. +
  1424. +static inline int mptcp_find_free_index(u8 bitfield)
  1425. +{
  1426. + return __mptcp_find_free_index(bitfield, -1, 0);
  1427. +}
  1428. +
  1429. +/* Find the first index whose bit in the bit-field == 0 */
  1430. +static inline u8 mptcp_set_new_pathindex(struct mptcp_cb *mpcb)
  1431. +{
  1432. + u8 base = mpcb->next_path_index;
  1433. + int i;
  1434. +
  1435. + /* Start at 1, because 0 is reserved for the meta-sk */
  1436. + mptcp_for_each_bit_unset(mpcb->path_index_bits >> base, i) {
  1437. + if (i + base < 1)
  1438. + continue;
  1439. + if (i + base >= sizeof(mpcb->path_index_bits) * 8)
  1440. + break;
  1441. + i += base;
  1442. + mpcb->path_index_bits |= (1 << i);
  1443. + mpcb->next_path_index = i + 1;
  1444. + return i;
  1445. + }
  1446. + mptcp_for_each_bit_unset(mpcb->path_index_bits, i) {
  1447. + if (i >= sizeof(mpcb->path_index_bits) * 8)
  1448. + break;
  1449. + if (i < 1)
  1450. + continue;
  1451. + mpcb->path_index_bits |= (1 << i);
  1452. + mpcb->next_path_index = i + 1;
  1453. + return i;
  1454. + }
  1455. +
  1456. + return 0;
  1457. +}
  1458. +
  1459. +static inline int mptcp_v6_is_v4_mapped(struct sock *sk)
  1460. +{
  1461. + return sk->sk_family == AF_INET6 &&
  1462. + ipv6_addr_type(&inet6_sk(sk)->saddr) == IPV6_ADDR_MAPPED;
  1463. +}
  1464. +
  1465. +/* TCP and MPTCP mpc flag-depending functions */
  1466. +u16 mptcp_select_window(struct sock *sk);
  1467. +void mptcp_init_buffer_space(struct sock *sk);
  1468. +void mptcp_tcp_set_rto(struct sock *sk);
  1469. +
  1470. +static inline void set_mpc(struct tcp_sock *tp)
  1471. +{
  1472. + tp->mpc = 1;
  1473. +
  1474. + tp->__select_window = __mptcp_select_window;
  1475. + tp->select_window = mptcp_select_window;
  1476. + tp->select_initial_window = mptcp_select_initial_window;
  1477. + tp->init_buffer_space = mptcp_init_buffer_space;
  1478. + tp->set_rto = mptcp_tcp_set_rto;
  1479. + tp->should_expand_sndbuf = mptcp_should_expand_sndbuf;
  1480. +}
  1481. +
  1482. +#else /* CONFIG_MPTCP */
  1483. +#define mptcp_debug(fmt, args...) \
  1484. + do { \
  1485. + } while (0)
  1486. +
  1487. +/* Without MPTCP, we just do one iteration
  1488. + * over the only socket available. This assumes that
  1489. + * the sk/tp arg is the socket in that case.
  1490. + */
  1491. +#define mptcp_for_each_sk(mpcb, sk)
  1492. +#define mptcp_for_each_sk_safe(__mpcb, __sk, __temp)
  1493. +
  1494. +static inline int mptcp_is_data_fin(const struct sk_buff *skb)
  1495. +{
  1496. + return 0;
  1497. +}
  1498. +static inline int mptcp_is_data_seq(const struct sk_buff *skb)
  1499. +{
  1500. + return 0;
  1501. +}
  1502. +static inline struct sock *mptcp_meta_sk(const struct sock *sk)
  1503. +{
  1504. + return NULL;
  1505. +}
  1506. +static inline struct tcp_sock *mptcp_meta_tp(const struct tcp_sock *tp)
  1507. +{
  1508. + return NULL;
  1509. +}
  1510. +static inline int is_meta_sk(const struct sock *sk)
  1511. +{
  1512. + return 0;
  1513. +}
  1514. +static inline int is_master_tp(const struct tcp_sock *tp)
  1515. +{
  1516. + return 0;
  1517. +}
  1518. +static inline void mptcp_purge_ofo_queue(struct tcp_sock *meta_tp) {}
  1519. +static inline void mptcp_cleanup_rbuf(const struct sock *meta_sk, int copied) {}
  1520. +static inline void mptcp_del_sock(const struct sock *sk) {}
  1521. +static inline void mptcp_reinject_data(struct sock *orig_sk, int clone_it) {}
  1522. +static inline void mptcp_update_sndbuf(const struct mptcp_cb *mpcb) {}
  1523. +static inline void mptcp_skb_entail_init(const struct tcp_sock *tp,
  1524. + const struct sk_buff *skb) {}
  1525. +static inline void mptcp_clean_rtx_infinite(const struct sk_buff *skb,
  1526. + const struct sock *sk) {}
  1527. +static inline void mptcp_retransmit_timer(const struct sock *meta_sk) {}
  1528. +static inline int mptcp_write_wakeup(struct sock *meta_sk)
  1529. +{
  1530. + return 0;
  1531. +}
  1532. +static inline void mptcp_sub_close(struct sock *sk, unsigned long delay) {}
  1533. +static inline void mptcp_set_rto(const struct sock *sk) {}
  1534. +static inline void mptcp_send_fin(const struct sock *meta_sk) {}
  1535. +static inline void mptcp_parse_options(const uint8_t *ptr, const int opsize,
  1536. + const struct tcp_options_received *opt_rx,
  1537. + const struct mptcp_options_received *mopt,
  1538. + const struct sk_buff *skb) {}
  1539. +static inline void mptcp_syn_options(struct sock *sk,
  1540. + struct tcp_out_options *opts,
  1541. + unsigned *remaining) {}
  1542. +static inline void mptcp_synack_options(struct request_sock *req,
  1543. + struct tcp_out_options *opts,
  1544. + unsigned *remaining) {}
  1545. +
  1546. +static inline void mptcp_established_options(struct sock *sk,
  1547. + struct sk_buff *skb,
  1548. + struct tcp_out_options *opts,
  1549. + unsigned *size) {}
  1550. +static inline void mptcp_options_write(__be32 *ptr, struct tcp_sock *tp,
  1551. + struct tcp_out_options *opts,
  1552. + struct sk_buff *skb) {}
  1553. +static inline void mptcp_close(struct sock *meta_sk, long timeout) {}
  1554. +static inline int mptcp_doit(struct sock *sk)
  1555. +{
  1556. + return 0;
  1557. +}
  1558. +static inline int mptcp_check_req_master(const struct sock *sk,
  1559. + const struct sock *child,
  1560. + struct request_sock *req,
  1561. + struct request_sock **prev,
  1562. + const struct mptcp_options_received *mopt)
  1563. +{
  1564. + return 1;
  1565. +}
  1566. +static inline struct sock *mptcp_check_req_child(struct sock *sk,
  1567. + struct sock *child,
  1568. + struct request_sock *req,
  1569. + struct request_sock **prev,
  1570. + struct mptcp_options_received *mopt)
  1571. +{
  1572. + return NULL;
  1573. +}
  1574. +static inline unsigned int mptcp_current_mss(struct sock *meta_sk)
  1575. +{
  1576. + return 0;
  1577. +}
  1578. +static inline int mptcp_select_size(const struct sock *meta_sk, bool sg)
  1579. +{
  1580. + return 0;
  1581. +}
  1582. +static inline void mptcp_sub_close_passive(struct sock *sk) {}
  1583. +static inline bool mptcp_fallback_infinite(const struct sock *sk, int flag)
  1584. +{
  1585. + return false;
  1586. +}
  1587. +static inline void mptcp_init_mp_opt(const struct mptcp_options_received *mopt) {}
  1588. +static inline int mptcp_check_rtt(const struct tcp_sock *tp, int time)
  1589. +{
  1590. + return 0;
  1591. +}
  1592. +static inline int mptcp_check_snd_buf(const struct tcp_sock *tp)
  1593. +{
  1594. + return 0;
  1595. +}
  1596. +static inline int mptcp_sysctl_syn_retries(void)
  1597. +{
  1598. + return 0;
  1599. +}
  1600. +static inline void mptcp_send_reset(const struct sock *sk) {}
  1601. +static inline void mptcp_send_active_reset(struct sock *meta_sk,
  1602. + gfp_t priority) {}
  1603. +static inline int mptcp_write_xmit(struct sock *sk, unsigned int mss_now,
  1604. + int nonagle, int push_one, gfp_t gfp)
  1605. +{
  1606. + return 0;
  1607. +}
  1608. +static inline struct sock *mptcp_sk_clone(const struct sock *sk, int family,
  1609. + const gfp_t priority)
  1610. +{
  1611. + return NULL;
  1612. +}
  1613. +static inline int mptcp_handle_options(struct sock *sk,
  1614. + const struct tcphdr *th,
  1615. + struct sk_buff *skb)
  1616. +{
  1617. + return 0;
  1618. +}
  1619. +static inline void mptcp_reset_mopt(struct tcp_sock *tp) {}
  1620. +static inline void __init mptcp_init(void) {}
  1621. +static inline int mptcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
  1622. +{
  1623. + return 0;
  1624. +}
  1625. +static inline int mptcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
  1626. + unsigned int mss_now, int reinject)
  1627. +{
  1628. + return 0;
  1629. +}
  1630. +static inline int mptso_fragment(struct sock *sk, struct sk_buff *skb,
  1631. + unsigned int len, unsigned int mss_now,
  1632. + gfp_t gfp, int reinject)
  1633. +{
  1634. + return 0;
  1635. +}
  1636. +static inline bool mptcp_sk_can_gso(const struct sock *sk)
  1637. +{
  1638. + return false;
  1639. +}
  1640. +static inline bool mptcp_can_sg(const struct sock *meta_sk)
  1641. +{
  1642. + return false;
  1643. +}
  1644. +static inline unsigned int mptcp_xmit_size_goal(struct sock *meta_sk,
  1645. + u32 mss_now, int large_allowed)
  1646. +{
  1647. + return 0;
  1648. +}
  1649. +static inline void mptcp_destroy_sock(struct sock *sk) {}
  1650. +static inline int mptcp_rcv_synsent_state_process(struct sock *sk,
  1651. + struct sock **skptr,
  1652. + struct sk_buff *skb,
  1653. + struct mptcp_options_received *mopt)
  1654. +{
  1655. + return 0;
  1656. +}
  1657. +static inline bool mptcp_can_sendpage(struct sock *sk)
  1658. +{
  1659. + return false;
  1660. +}
  1661. +static inline int mptcp_time_wait(struct sock *sk, struct tcp_timewait_sock *tw)
  1662. +{
  1663. + return 0;
  1664. +}
  1665. +static inline void mptcp_twsk_destructor(struct tcp_timewait_sock *tw) {}
  1666. +static inline void mptcp_update_tw_socks(const struct tcp_sock *tp, int state) {}
  1667. +static inline void mptcp_disconnect(struct sock *sk) {}
  1668. +static inline void mptcp_tsq_flags(struct sock *sk) {}
  1669. +static inline void mptcp_tsq_sub_deferred(struct sock *meta_sk) {}
  1670. +static inline void mptcp_hash_remove_bh(struct tcp_sock *meta_tp) {}
  1671. +static inline void mptcp_hash_remove(struct tcp_sock *meta_tp) {}
  1672. +static inline void mptcp_reqsk_new_mptcp(struct request_sock *req,
  1673. + const struct tcp_options_received *rx_opt,
  1674. + const struct mptcp_options_received *mopt,
  1675. + const struct sk_buff *skb) {}
  1676. +static inline void mptcp_remove_shortcuts(const struct mptcp_cb *mpcb,
  1677. + const struct sk_buff *skb) {}
  1678. +#endif /* CONFIG_MPTCP */
  1679. +
  1680. +#endif /* _MPTCP_H */
  1681. diff -Nur linux-3.14.45.orig/include/net/mptcp_v4.h linux-3.14.45/include/net/mptcp_v4.h
  1682. --- linux-3.14.45.orig/include/net/mptcp_v4.h 1970-01-01 01:00:00.000000000 +0100
  1683. +++ linux-3.14.45/include/net/mptcp_v4.h 2015-06-24 14:15:48.871862463 +0200
  1684. @@ -0,0 +1,69 @@
  1685. +/*
  1686. + * MPTCP implementation
  1687. + *
  1688. + * Initial Design & Implementation:
  1689. + * Sébastien Barré <sebastien.barre@uclouvain.be>
  1690. + *
  1691. + * Current Maintainer & Author:
  1692. + * Christoph Paasch <christoph.paasch@uclouvain.be>
  1693. + *
  1694. + * Additional authors:
  1695. + * Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
  1696. + * Gregory Detal <gregory.detal@uclouvain.be>
  1697. + * Fabien Duchêne <fabien.duchene@uclouvain.be>
  1698. + * Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
  1699. + * Lavkesh Lahngir <lavkesh51@gmail.com>
  1700. + * Andreas Ripke <ripke@neclab.eu>
  1701. + * Vlad Dogaru <vlad.dogaru@intel.com>
  1702. + * Octavian Purdila <octavian.purdila@intel.com>
  1703. + * John Ronan <jronan@tssg.org>
  1704. + * Catalin Nicutar <catalin.nicutar@gmail.com>
  1705. + * Brandon Heller <brandonh@stanford.edu>
  1706. + *
  1707. + *
  1708. + * This program is free software; you can redistribute it and/or
  1709. + * modify it under the terms of the GNU General Public License
  1710. + * as published by the Free Software Foundation; either version
  1711. + * 2 of the License, or (at your option) any later version.
  1712. + */
  1713. +
  1714. +#ifndef MPTCP_V4_H_
  1715. +#define MPTCP_V4_H_
  1716. +
  1717. +
  1718. +#include <linux/in.h>
  1719. +#include <linux/skbuff.h>
  1720. +#include <net/mptcp.h>
  1721. +#include <net/request_sock.h>
  1722. +#include <net/sock.h>
  1723. +
  1724. +extern struct request_sock_ops mptcp_request_sock_ops;
  1725. +
  1726. +#ifdef CONFIG_MPTCP
  1727. +
  1728. +int mptcp_v4_do_rcv(struct sock *meta_sk, struct sk_buff *skb);
  1729. +int mptcp_v4_rem_raddress(struct mptcp_cb *mpcb, u8 id);
  1730. +int mptcp_v4_add_raddress(struct mptcp_cb *mpcb, const struct in_addr *addr,
  1731. + __be16 port, u8 id);
  1732. +void mptcp_v4_set_init_addr_bit(struct mptcp_cb *mpcb, __be32 daddr, int index);
  1733. +struct sock *mptcp_v4_search_req(const __be16 rport, const __be32 raddr,
  1734. + const __be32 laddr, const struct net *net);
  1735. +int mptcp_init4_subsockets(struct sock *meta_sk, const struct mptcp_loc4 *loc,
  1736. + struct mptcp_rem4 *rem);
  1737. +int mptcp_pm_v4_init(void);
  1738. +void mptcp_pm_v4_undo(void);
  1739. +u32 mptcp_v4_get_nonce(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
  1740. + u32 seq);
  1741. +u64 mptcp_v4_get_key(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport);
  1742. +
  1743. +#else
  1744. +
  1745. +static inline int mptcp_v4_do_rcv(const struct sock *meta_sk,
  1746. + const struct sk_buff *skb)
  1747. +{
  1748. + return 0;
  1749. +}
  1750. +
  1751. +#endif /* CONFIG_MPTCP */
  1752. +
  1753. +#endif /* MPTCP_V4_H_ */
  1754. diff -Nur linux-3.14.45.orig/include/net/mptcp_v6.h linux-3.14.45/include/net/mptcp_v6.h
  1755. --- linux-3.14.45.orig/include/net/mptcp_v6.h 1970-01-01 01:00:00.000000000 +0100
  1756. +++ linux-3.14.45/include/net/mptcp_v6.h 2015-06-24 14:15:48.871862463 +0200
  1757. @@ -0,0 +1,72 @@
  1758. +/*
  1759. + * MPTCP implementation
  1760. + *
  1761. + * Initial Design & Implementation:
  1762. + * Sébastien Barré <sebastien.barre@uclouvain.be>
  1763. + *
  1764. + * Current Maintainer & Author:
  1765. + * Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
  1766. + *
  1767. + * Additional authors:
  1768. + * Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
  1769. + * Gregory Detal <gregory.detal@uclouvain.be>
  1770. + * Fabien Duchêne <fabien.duchene@uclouvain.be>
  1771. + * Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
  1772. + * Lavkesh Lahngir <lavkesh51@gmail.com>
  1773. + * Andreas Ripke <ripke@neclab.eu>
  1774. + * Vlad Dogaru <vlad.dogaru@intel.com>
  1775. + * Octavian Purdila <octavian.purdila@intel.com>
  1776. + * John Ronan <jronan@tssg.org>
  1777. + * Catalin Nicutar <catalin.nicutar@gmail.com>
  1778. + * Brandon Heller <brandonh@stanford.edu>
  1779. + *
  1780. + *
  1781. + * This program is free software; you can redistribute it and/or
  1782. + * modify it under the terms of the GNU General Public License
  1783. + * as published by the Free Software Foundation; either version
  1784. + * 2 of the License, or (at your option) any later version.
  1785. + */
  1786. +
  1787. +#ifndef _MPTCP_V6_H
  1788. +#define _MPTCP_V6_H
  1789. +
  1790. +#include <linux/in6.h>
  1791. +#include <net/if_inet6.h>
  1792. +
  1793. +#include <net/mptcp.h>
  1794. +
  1795. +extern struct request_sock_ops mptcp6_request_sock_ops;
  1796. +extern struct proto mptcpv6_prot;
  1797. +
  1798. +#ifdef CONFIG_MPTCP
  1799. +
  1800. +int mptcp_v6_do_rcv(struct sock *meta_sk, struct sk_buff *skb);
  1801. +int mptcp_v6_rem_raddress(struct mptcp_cb *mpcb, u8 id);
  1802. +int mptcp_v6_add_raddress(struct mptcp_cb *mpcb, const struct in6_addr *addr,
  1803. + __be16 port, u8 id);
  1804. +void mptcp_v6_set_init_addr_bit(struct mptcp_cb *mpcb,
  1805. + const struct in6_addr *daddr, int index);
  1806. +struct sock *mptcp_v6_search_req(const __be16 rport, const struct in6_addr *raddr,
  1807. + const struct in6_addr *laddr, const struct net *net);
  1808. +int mptcp_init6_subsockets(struct sock *meta_sk, const struct mptcp_loc6 *loc,
  1809. + struct mptcp_rem6 *rem);
  1810. +int mptcp_pm_v6_init(void);
  1811. +void mptcp_pm_v6_undo(void);
  1812. +struct sock *mptcp_v6v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
  1813. + struct request_sock *req,
  1814. + struct dst_entry *dst);
  1815. +__u32 mptcp_v6_get_nonce(const __be32 *saddr, const __be32 *daddr,
  1816. + __be16 sport, __be16 dport, u32 seq);
  1817. +u64 mptcp_v6_get_key(const __be32 *saddr, const __be32 *daddr,
  1818. + __be16 sport, __be16 dport);
  1819. +
  1820. +#else /* CONFIG_MPTCP */
  1821. +
  1822. +static inline int mptcp_v6_do_rcv(struct sock *meta_sk, struct sk_buff *skb)
  1823. +{
  1824. + return 0;
  1825. +}
  1826. +
  1827. +#endif /* CONFIG_MPTCP */
  1828. +
  1829. +#endif /* _MPTCP_V6_H */
  1830. diff -Nur linux-3.14.45.orig/include/net/net_namespace.h linux-3.14.45/include/net/net_namespace.h
  1831. --- linux-3.14.45.orig/include/net/net_namespace.h 2015-06-23 02:01:36.000000000 +0200
  1832. +++ linux-3.14.45/include/net/net_namespace.h 2015-06-24 14:15:48.871862463 +0200
  1833. @@ -15,6 +15,7 @@
  1834. #include <net/netns/packet.h>
  1835. #include <net/netns/ipv4.h>
  1836. #include <net/netns/ipv6.h>
  1837. +#include <net/netns/mptcp.h>
  1838. #include <net/netns/sctp.h>
  1839. #include <net/netns/dccp.h>
  1840. #include <net/netns/netfilter.h>
  1841. @@ -90,6 +91,9 @@
  1842. #if IS_ENABLED(CONFIG_IPV6)
  1843. struct netns_ipv6 ipv6;
  1844. #endif
  1845. +#if IS_ENABLED(CONFIG_MPTCP)
  1846. + struct netns_mptcp mptcp;
  1847. +#endif
  1848. #if defined(CONFIG_IP_SCTP) || defined(CONFIG_IP_SCTP_MODULE)
  1849. struct netns_sctp sctp;
  1850. #endif
  1851. diff -Nur linux-3.14.45.orig/include/net/netns/mptcp.h linux-3.14.45/include/net/netns/mptcp.h
  1852. --- linux-3.14.45.orig/include/net/netns/mptcp.h 1970-01-01 01:00:00.000000000 +0100
  1853. +++ linux-3.14.45/include/net/netns/mptcp.h 2015-06-24 14:15:48.871862463 +0200
  1854. @@ -0,0 +1,44 @@
  1855. +/*
  1856. + * MPTCP implementation - MPTCP namespace
  1857. + *
  1858. + * Initial Design & Implementation:
  1859. + * Sébastien Barré <sebastien.barre@uclouvain.be>
  1860. + *
  1861. + * Current Maintainer:
  1862. + * Christoph Paasch <christoph.paasch@uclouvain.be>
  1863. + *
  1864. + * Additional authors:
  1865. + * Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
  1866. + * Gregory Detal <gregory.detal@uclouvain.be>
  1867. + * Fabien Duchêne <fabien.duchene@uclouvain.be>
  1868. + * Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
  1869. + * Lavkesh Lahngir <lavkesh51@gmail.com>
  1870. + * Andreas Ripke <ripke@neclab.eu>
  1871. + * Vlad Dogaru <vlad.dogaru@intel.com>
  1872. + * Octavian Purdila <octavian.purdila@intel.com>
  1873. + * John Ronan <jronan@tssg.org>
  1874. + * Catalin Nicutar <catalin.nicutar@gmail.com>
  1875. + * Brandon Heller <brandonh@stanford.edu>
  1876. + *
  1877. + *
  1878. + * This program is free software; you can redistribute it and/or
  1879. + * modify it under the terms of the GNU General Public License
  1880. + * as published by the Free Software Foundation; either version
  1881. + * 2 of the License, or (at your option) any later version.
  1882. + */
  1883. +
  1884. +#ifndef __NETNS_MPTCP_H__
  1885. +#define __NETNS_MPTCP_H__
  1886. +
  1887. +#include <linux/compiler.h>
  1888. +
  1889. +enum {
  1890. + MPTCP_PM_FULLMESH = 0,
  1891. + MPTCP_PM_MAX
  1892. +};
  1893. +
  1894. +struct netns_mptcp {
  1895. + void *path_managers[MPTCP_PM_MAX];
  1896. +};
  1897. +
  1898. +#endif /* __NETNS_MPTCP_H__ */
  1899. diff -Nur linux-3.14.45.orig/include/net/request_sock.h linux-3.14.45/include/net/request_sock.h
  1900. --- linux-3.14.45.orig/include/net/request_sock.h 2015-06-23 02:01:36.000000000 +0200
  1901. +++ linux-3.14.45/include/net/request_sock.h 2015-06-24 14:15:48.871862463 +0200
  1902. @@ -164,7 +164,7 @@
  1903. };
  1904. int reqsk_queue_alloc(struct request_sock_queue *queue,
  1905. - unsigned int nr_table_entries);
  1906. + unsigned int nr_table_entries, gfp_t flags);
  1907. void __reqsk_queue_destroy(struct request_sock_queue *queue);
  1908. void reqsk_queue_destroy(struct request_sock_queue *queue);
  1909. diff -Nur linux-3.14.45.orig/include/net/sock.h linux-3.14.45/include/net/sock.h
  1910. --- linux-3.14.45.orig/include/net/sock.h 2015-06-23 02:01:36.000000000 +0200
  1911. +++ linux-3.14.45/include/net/sock.h 2015-06-24 14:15:48.871862463 +0200
  1912. @@ -899,6 +899,16 @@
  1913. int sk_wait_data(struct sock *sk, long *timeo);
  1914. +/* START - needed for MPTCP */
  1915. +extern void sock_def_error_report(struct sock *sk);
  1916. +extern struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
  1917. + int family);
  1918. +extern void sock_lock_init(struct sock *sk);
  1919. +
  1920. +extern struct lock_class_key af_callback_keys[AF_MAX];
  1921. +extern char *const af_family_clock_key_strings[AF_MAX+1];
  1922. +/* END - needed for MPTCP */
  1923. +
  1924. struct request_sock_ops;
  1925. struct timewait_sock_ops;
  1926. struct inet_hashinfo;
  1927. diff -Nur linux-3.14.45.orig/include/net/tcp.h linux-3.14.45/include/net/tcp.h
  1928. --- linux-3.14.45.orig/include/net/tcp.h 2015-06-23 02:01:36.000000000 +0200
  1929. +++ linux-3.14.45/include/net/tcp.h 2015-06-24 14:15:48.875862469 +0200
  1930. @@ -176,6 +176,7 @@
  1931. #define TCPOPT_SACK 5 /* SACK Block */
  1932. #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
  1933. #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */
  1934. +#define TCPOPT_MPTCP 30
  1935. #define TCPOPT_EXP 254 /* Experimental */
  1936. /* Magic number to be after the option value for sharing TCP
  1937. * experimental options. See draft-ietf-tcpm-experimental-options-00.txt
  1938. @@ -234,6 +235,27 @@
  1939. */
  1940. #define TFO_SERVER_ALWAYS 0x1000
  1941. +/* Flags from tcp_input.c for tcp_ack */
  1942. +#define FLAG_DATA 0x01 /* Incoming frame contained data. */
  1943. +#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
  1944. +#define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */
  1945. +#define FLAG_RETRANS_DATA_ACKED 0x08 /* "" "" some of which was retransmitted. */
  1946. +#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */
  1947. +#define FLAG_DATA_SACKED 0x20 /* New SACK. */
  1948. +#define FLAG_ECE 0x40 /* ECE in this ACK */
  1949. +#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
  1950. +#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */
  1951. +#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
  1952. +#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */
  1953. +#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
  1954. +#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */
  1955. +#define MPTCP_FLAG_DATA_ACKED 0x8000
  1956. +
  1957. +#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
  1958. +#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
  1959. +#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
  1960. +#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
  1961. +
  1962. extern struct inet_timewait_death_row tcp_death_row;
  1963. /* sysctl variables for tcp */
  1964. @@ -349,6 +371,112 @@
  1965. #define TCP_ADD_STATS_USER(net, field, val) SNMP_ADD_STATS_USER((net)->mib.tcp_statistics, field, val)
  1966. #define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val)
  1967. +/**** START - Exports needed for MPTCP ****/
  1968. +extern const struct inet_connection_sock_af_ops ipv4_specific;
  1969. +extern const struct inet_connection_sock_af_ops ipv6_specific;
  1970. +extern const struct inet_connection_sock_af_ops ipv6_mapped;
  1971. +extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops;
  1972. +extern const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops;
  1973. +
  1974. +struct mptcp_options_received;
  1975. +
  1976. +int tcp_close_state(struct sock *sk);
  1977. +void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle, int
  1978. + size_goal);
  1979. +void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
  1980. + const struct sk_buff *skb);
  1981. +int tcp_xmit_probe_skb(struct sock *sk, int urgent);
  1982. +void tcp_cwnd_validate(struct sock *sk);
  1983. +void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb);
  1984. +int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
  1985. + gfp_t gfp_mask);
  1986. +unsigned int tcp_mss_split_point(const struct sock *sk,
  1987. + const struct sk_buff *skb,
  1988. + unsigned int mss_now,
  1989. + unsigned int max_segs,
  1990. + int nonagle);
  1991. +bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb);
  1992. +bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
  1993. + unsigned int cur_mss, int nonagle);
  1994. +bool tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb,
  1995. + unsigned int cur_mss);
  1996. +unsigned int tcp_cwnd_test(const struct tcp_sock *tp, const struct sk_buff *skb);
  1997. +int tcp_mtu_probe(struct sock *sk);
  1998. +int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
  1999. + unsigned int mss_now);
  2000. +void __pskb_trim_head(struct sk_buff *skb, int len);
  2001. +void tcp_queue_skb(struct sock *sk, struct sk_buff *skb);
  2002. +void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags);
  2003. +void tcp_reset(struct sock *sk);
  2004. +bool tcp_may_update_window(const struct tcp_sock *tp, const u32 ack,
  2005. + const u32 ack_seq, const u32 nwin);
  2006. +bool tcp_urg_mode(const struct tcp_sock *tp);
  2007. +void tcp_ack_probe(struct sock *sk);
  2008. +void tcp_rearm_rto(struct sock *sk);
  2009. +int tcp_write_timeout(struct sock *sk);
  2010. +bool retransmits_timed_out(struct sock *sk, unsigned int boundary,
  2011. + unsigned int timeout, bool syn_set);
  2012. +void tcp_write_err(struct sock *sk);
  2013. +void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr);
  2014. +void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
  2015. + unsigned int mss_now);
  2016. +
  2017. +int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req);
  2018. +void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
  2019. + struct request_sock *req);
  2020. +__u32 tcp_v4_init_sequence(const struct sk_buff *skb);
  2021. +int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
  2022. + struct request_sock *req,
  2023. + u16 queue_mapping);
  2024. +void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb);
  2025. +struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb);
  2026. +struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb);
  2027. +void tcp_v4_reqsk_destructor(struct request_sock *req);
  2028. +
  2029. +int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req);
  2030. +void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
  2031. + struct request_sock *req);
  2032. +__u32 tcp_v6_init_sequence(const struct sk_buff *skb);
  2033. +int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
  2034. + struct flowi6 *fl6, struct request_sock *req,
  2035. + u16 queue_mapping);
  2036. +void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
  2037. +int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
  2038. +int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
  2039. +void tcp_v6_destroy_sock(struct sock *sk);
  2040. +void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb);
  2041. +void tcp_v6_hash(struct sock *sk);
  2042. +struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb);
  2043. +struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
  2044. + struct request_sock *req,
  2045. + struct dst_entry *dst);
  2046. +void tcp_v6_reqsk_destructor(struct request_sock *req);
  2047. +
  2048. +void sock_valbool_flag(struct sock *sk, int bit, int valbool);
  2049. +unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
  2050. + int large_allowed);
  2051. +u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb);
  2052. +
  2053. +void skb_clone_fraglist(struct sk_buff *skb);
  2054. +void copy_skb_header(struct sk_buff *new, const struct sk_buff *old);
  2055. +
  2056. +void inet_twsk_free(struct inet_timewait_sock *tw);
  2057. +/* These states need RST on ABORT according to RFC793 */
  2058. +static inline bool tcp_need_reset(int state)
  2059. +{
  2060. + return (1 << state) &
  2061. + (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
  2062. + TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
  2063. +}
  2064. +
  2065. +bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
  2066. + int hlen);
  2067. +int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
  2068. + bool *fragstolen);
  2069. +bool tcp_try_coalesce(struct sock *sk, struct sk_buff *to,
  2070. + struct sk_buff *from, bool *fragstolen);
  2071. +/**** END - Exports needed for MPTCP ****/
  2072. +
  2073. void tcp_tasklet_init(void);
  2074. void tcp_v4_err(struct sk_buff *skb, u32);
  2075. @@ -445,6 +573,7 @@
  2076. size_t len, int nonblock, int flags, int *addr_len);
  2077. void tcp_parse_options(const struct sk_buff *skb,
  2078. struct tcp_options_received *opt_rx,
  2079. + struct mptcp_options_received *mopt_rx,
  2080. int estab, struct tcp_fastopen_cookie *foc);
  2081. const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
  2082. @@ -558,11 +687,15 @@
  2083. void tcp_send_loss_probe(struct sock *sk);
  2084. bool tcp_schedule_loss_probe(struct sock *sk);
  2085. +u16 tcp_select_window(struct sock *sk);
  2086. +
  2087. /* tcp_input.c */
  2088. void tcp_cwnd_application_limited(struct sock *sk);
  2089. void tcp_resume_early_retransmit(struct sock *sk);
  2090. void tcp_rearm_rto(struct sock *sk);
  2091. void tcp_reset(struct sock *sk);
  2092. +void tcp_set_rto(struct sock *sk);
  2093. +bool tcp_should_expand_sndbuf(const struct sock *sk);
  2094. /* tcp_timer.c */
  2095. void tcp_init_xmit_timers(struct sock *);
  2096. @@ -706,14 +839,24 @@
  2097. */
  2098. struct tcp_skb_cb {
  2099. union {
  2100. - struct inet_skb_parm h4;
  2101. + union {
  2102. + struct inet_skb_parm h4;
  2103. #if IS_ENABLED(CONFIG_IPV6)
  2104. - struct inet6_skb_parm h6;
  2105. + struct inet6_skb_parm h6;
  2106. +#endif
  2107. + } header; /* For incoming frames */
  2108. +#ifdef CONFIG_MPTCP
  2109. + __u32 path_mask; /* path indices that tried to send this skb */
  2110. #endif
  2111. - } header; /* For incoming frames */
  2112. + };
  2113. __u32 seq; /* Starting sequence number */
  2114. __u32 end_seq; /* SEQ + FIN + SYN + datalen */
  2115. __u32 when; /* used to compute rtt's */
  2116. +#ifdef CONFIG_MPTCP
  2117. + __u8 mptcp_flags; /* flags for the MPTCP layer */
  2118. + __u8 dss_off; /* Number of 4-byte words until
  2119. + * seq-number */
  2120. +#endif
  2121. __u8 tcp_flags; /* TCP header flags. (tcp[13]) */
  2122. __u8 sacked; /* State flags for SACK/FACK. */
  2123. @@ -1061,7 +1204,8 @@
  2124. /* Determine a window scaling and initial window to offer. */
  2125. void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd,
  2126. __u32 *window_clamp, int wscale_ok,
  2127. - __u8 *rcv_wscale, __u32 init_rcv_wnd);
  2128. + __u8 *rcv_wscale, __u32 init_rcv_wnd,
  2129. + const struct sock *sk);
  2130. static inline int tcp_win_from_space(int space)
  2131. {
  2132. @@ -1073,12 +1217,18 @@
  2133. /* Note: caller must be prepared to deal with negative returns */
  2134. static inline int tcp_space(const struct sock *sk)
  2135. {
  2136. + if (tcp_sk(sk)->mpc)
  2137. + sk = tcp_sk(sk)->meta_sk;
  2138. +
  2139. return tcp_win_from_space(sk->sk_rcvbuf -
  2140. atomic_read(&sk->sk_rmem_alloc));
  2141. }
  2142. static inline int tcp_full_space(const struct sock *sk)
  2143. {
  2144. + if (tcp_sk(sk)->mpc)
  2145. + sk = tcp_sk(sk)->meta_sk;
  2146. +
  2147. return tcp_win_from_space(sk->sk_rcvbuf);
  2148. }
  2149. @@ -1093,6 +1243,7 @@
  2150. tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
  2151. tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
  2152. tcp_rsk(req)->snt_synack = 0;
  2153. + tcp_rsk(req)->saw_mpc = 0;
  2154. req->mss = rx_opt->mss_clamp;
  2155. req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
  2156. ireq->tstamp_ok = rx_opt->tstamp_ok;
  2157. diff -Nur linux-3.14.45.orig/include/uapi/linux/if.h linux-3.14.45/include/uapi/linux/if.h
  2158. --- linux-3.14.45.orig/include/uapi/linux/if.h 2015-06-23 02:01:36.000000000 +0200
  2159. +++ linux-3.14.45/include/uapi/linux/if.h 2015-06-24 14:15:48.875862469 +0200
  2160. @@ -53,6 +53,9 @@
  2161. #define IFF_ECHO 0x40000 /* echo sent packets */
  2162. +#define IFF_NOMULTIPATH 0x80000 /* Disable for MPTCP */
  2163. +#define IFF_MPBACKUP 0x100000 /* Use as backup path for MPTCP */
  2164. +
  2165. #define IFF_VOLATILE (IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_ECHO|\
  2166. IFF_MASTER|IFF_SLAVE|IFF_RUNNING|IFF_LOWER_UP|IFF_DORMANT)
  2167. diff -Nur linux-3.14.45.orig/include/uapi/linux/tcp.h linux-3.14.45/include/uapi/linux/tcp.h
  2168. --- linux-3.14.45.orig/include/uapi/linux/tcp.h 2015-06-23 02:01:36.000000000 +0200
  2169. +++ linux-3.14.45/include/uapi/linux/tcp.h 2015-06-24 14:15:48.875862469 +0200
  2170. @@ -112,6 +112,7 @@
  2171. #define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */
  2172. #define TCP_TIMESTAMP 24
  2173. #define TCP_NOTSENT_LOWAT 25 /* limit number of unsent bytes in write queue */
  2174. +#define MPTCP_ENABLED 26
  2175. struct tcp_repair_opt {
  2176. __u32 opt_code;
  2177. diff -Nur linux-3.14.45.orig/net/Kconfig linux-3.14.45/net/Kconfig
  2178. --- linux-3.14.45.orig/net/Kconfig 2015-06-23 02:01:36.000000000 +0200
  2179. +++ linux-3.14.45/net/Kconfig 2015-06-24 14:15:48.875862469 +0200
  2180. @@ -79,6 +79,7 @@
  2181. source "net/ipv4/Kconfig"
  2182. source "net/ipv6/Kconfig"
  2183. source "net/netlabel/Kconfig"
  2184. +source "net/mptcp/Kconfig"
  2185. endif # if INET
  2186. diff -Nur linux-3.14.45.orig/net/Makefile linux-3.14.45/net/Makefile
  2187. --- linux-3.14.45.orig/net/Makefile 2015-06-23 02:01:36.000000000 +0200
  2188. +++ linux-3.14.45/net/Makefile 2015-06-24 14:15:48.875862469 +0200
  2189. @@ -20,6 +20,7 @@
  2190. obj-$(CONFIG_XFRM) += xfrm/
  2191. obj-$(CONFIG_UNIX) += unix/
  2192. obj-$(CONFIG_NET) += ipv6/
  2193. +obj-$(CONFIG_MPTCP) += mptcp/
  2194. obj-$(CONFIG_PACKET) += packet/
  2195. obj-$(CONFIG_NET_KEY) += key/
  2196. obj-$(CONFIG_BRIDGE) += bridge/
  2197. diff -Nur linux-3.14.45.orig/net/core/dev.c linux-3.14.45/net/core/dev.c
  2198. --- linux-3.14.45.orig/net/core/dev.c 2015-06-23 02:01:36.000000000 +0200
  2199. +++ linux-3.14.45/net/core/dev.c 2015-06-24 14:15:48.875862469 +0200
  2200. @@ -5399,7 +5399,7 @@
  2201. dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
  2202. IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
  2203. - IFF_AUTOMEDIA)) |
  2204. + IFF_AUTOMEDIA | IFF_NOMULTIPATH | IFF_MPBACKUP)) |
  2205. (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
  2206. IFF_ALLMULTI));
  2207. diff -Nur linux-3.14.45.orig/net/core/request_sock.c linux-3.14.45/net/core/request_sock.c
  2208. --- linux-3.14.45.orig/net/core/request_sock.c 2015-06-23 02:01:36.000000000 +0200
  2209. +++ linux-3.14.45/net/core/request_sock.c 2015-06-24 14:15:48.875862469 +0200
  2210. @@ -38,7 +38,8 @@
  2211. EXPORT_SYMBOL(sysctl_max_syn_backlog);
  2212. int reqsk_queue_alloc(struct request_sock_queue *queue,
  2213. - unsigned int nr_table_entries)
  2214. + unsigned int nr_table_entries,
  2215. + gfp_t flags)
  2216. {
  2217. size_t lopt_size = sizeof(struct listen_sock);
  2218. struct listen_sock *lopt;
  2219. @@ -48,9 +49,11 @@
  2220. nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
  2221. lopt_size += nr_table_entries * sizeof(struct request_sock *);
  2222. if (lopt_size > PAGE_SIZE)
  2223. - lopt = vzalloc(lopt_size);
  2224. + lopt = __vmalloc(lopt_size,
  2225. + flags | __GFP_HIGHMEM | __GFP_ZERO,
  2226. + PAGE_KERNEL);
  2227. else
  2228. - lopt = kzalloc(lopt_size, GFP_KERNEL);
  2229. + lopt = kzalloc(lopt_size, flags);
  2230. if (lopt == NULL)
  2231. return -ENOMEM;
  2232. diff -Nur linux-3.14.45.orig/net/core/skbuff.c linux-3.14.45/net/core/skbuff.c
  2233. --- linux-3.14.45.orig/net/core/skbuff.c 2015-06-23 02:01:36.000000000 +0200
  2234. +++ linux-3.14.45/net/core/skbuff.c 2015-06-24 14:15:48.875862469 +0200
  2235. @@ -491,7 +491,7 @@
  2236. skb_drop_list(&skb_shinfo(skb)->frag_list);
  2237. }
  2238. -static void skb_clone_fraglist(struct sk_buff *skb)
  2239. +void skb_clone_fraglist(struct sk_buff *skb)
  2240. {
  2241. struct sk_buff *list;
  2242. @@ -913,7 +913,7 @@
  2243. skb->inner_mac_header += off;
  2244. }
  2245. -static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
  2246. +void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
  2247. {
  2248. __copy_skb_header(new, old);
  2249. diff -Nur linux-3.14.45.orig/net/core/sock.c linux-3.14.45/net/core/sock.c
  2250. --- linux-3.14.45.orig/net/core/sock.c 2015-06-23 02:01:36.000000000 +0200
  2251. +++ linux-3.14.45/net/core/sock.c 2015-06-24 14:15:48.875862469 +0200
  2252. @@ -280,7 +280,7 @@
  2253. "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
  2254. "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX"
  2255. };
  2256. -static const char *const af_family_clock_key_strings[AF_MAX+1] = {
  2257. +char *const af_family_clock_key_strings[AF_MAX+1] = {
  2258. "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
  2259. "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
  2260. "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
  2261. @@ -301,7 +301,7 @@
  2262. * sk_callback_lock locking rules are per-address-family,
  2263. * so split the lock classes by using a per-AF key:
  2264. */
  2265. -static struct lock_class_key af_callback_keys[AF_MAX];
  2266. +struct lock_class_key af_callback_keys[AF_MAX];
  2267. /* Take into consideration the size of the struct sk_buff overhead in the
  2268. * determination of these values, since that is non-constant across
  2269. @@ -651,7 +651,7 @@
  2270. return ret;
  2271. }
  2272. -static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
  2273. +void sock_valbool_flag(struct sock *sk, int bit, int valbool)
  2274. {
  2275. if (valbool)
  2276. sock_set_flag(sk, bit);
  2277. @@ -1272,7 +1272,7 @@
  2278. *
  2279. * (We also register the sk_lock with the lock validator.)
  2280. */
  2281. -static inline void sock_lock_init(struct sock *sk)
  2282. +void sock_lock_init(struct sock *sk)
  2283. {
  2284. sock_lock_init_class_and_name(sk,
  2285. af_family_slock_key_strings[sk->sk_family],
  2286. @@ -1320,7 +1320,7 @@
  2287. }
  2288. EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
  2289. -static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
  2290. +struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
  2291. int family)
  2292. {
  2293. struct sock *sk;
  2294. @@ -2252,7 +2252,7 @@
  2295. rcu_read_unlock();
  2296. }
  2297. -static void sock_def_error_report(struct sock *sk)
  2298. +void sock_def_error_report(struct sock *sk)
  2299. {
  2300. struct socket_wq *wq;
  2301. diff -Nur linux-3.14.45.orig/net/ipv4/Kconfig linux-3.14.45/net/ipv4/Kconfig
  2302. --- linux-3.14.45.orig/net/ipv4/Kconfig 2015-06-23 02:01:36.000000000 +0200
  2303. +++ linux-3.14.45/net/ipv4/Kconfig 2015-06-24 14:15:48.875862469 +0200
  2304. @@ -556,6 +556,30 @@
  2305. For further details see:
  2306. http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
  2307. +config TCP_CONG_COUPLED
  2308. + tristate "MPTCP COUPLED CONGESTION CONTROL"
  2309. + depends on MPTCP
  2310. + default n
  2311. + ---help---
  2312. + MultiPath TCP Coupled Congestion Control
  2313. + To enable it, just put 'coupled' in tcp_congestion_control
  2314. +
  2315. +config TCP_CONG_OLIA
  2316. + tristate "MPTCP Opportunistic Linked Increase"
  2317. + depends on MPTCP
  2318. + default n
  2319. + ---help---
  2320. + MultiPath TCP Opportunistic Linked Increase Congestion Control
  2321. + To enable it, just put 'olia' in tcp_congestion_control
  2322. +
  2323. +config TCP_CONG_WVEGAS
  2324. + tristate "MPTCP WVEGAS CONGESTION CONTROL"
  2325. + depends on MPTCP
  2326. + default n
  2327. + ---help---
  2328. + wVegas congestion control for MPTCP
  2329. + To enable it, just put 'wvegas' in tcp_congestion_control
  2330. +
  2331. choice
  2332. prompt "Default TCP congestion control"
  2333. default DEFAULT_CUBIC
  2334. @@ -584,6 +608,15 @@
  2335. config DEFAULT_WESTWOOD
  2336. bool "Westwood" if TCP_CONG_WESTWOOD=y
  2337. + config DEFAULT_COUPLED
  2338. + bool "Coupled" if TCP_CONG_COUPLED=y
  2339. +
  2340. + config DEFAULT_OLIA
  2341. + bool "Olia" if TCP_CONG_OLIA=y
  2342. +
  2343. + config DEFAULT_WVEGAS
  2344. + bool "Wvegas" if TCP_CONG_WVEGAS=y
  2345. +
  2346. config DEFAULT_RENO
  2347. bool "Reno"
  2348. @@ -605,6 +638,8 @@
  2349. default "vegas" if DEFAULT_VEGAS
  2350. default "westwood" if DEFAULT_WESTWOOD
  2351. default "veno" if DEFAULT_VENO
  2352. + default "coupled" if DEFAULT_COUPLED
  2353. + default "wvegas" if DEFAULT_WVEGAS
  2354. default "reno" if DEFAULT_RENO
  2355. default "cubic"
  2356. diff -Nur linux-3.14.45.orig/net/ipv4/af_inet.c linux-3.14.45/net/ipv4/af_inet.c
  2357. --- linux-3.14.45.orig/net/ipv4/af_inet.c 2015-06-23 02:01:36.000000000 +0200
  2358. +++ linux-3.14.45/net/ipv4/af_inet.c 2015-06-24 14:15:48.875862469 +0200
  2359. @@ -104,6 +104,7 @@
  2360. #include <net/ip_fib.h>
  2361. #include <net/inet_connection_sock.h>
  2362. #include <net/tcp.h>
  2363. +#include <net/mptcp.h>
  2364. #include <net/udp.h>
  2365. #include <net/udplite.h>
  2366. #include <net/ping.h>
  2367. @@ -246,8 +247,7 @@
  2368. * Create an inet socket.
  2369. */
  2370. -static int inet_create(struct net *net, struct socket *sock, int protocol,
  2371. - int kern)
  2372. +int inet_create(struct net *net, struct socket *sock, int protocol, int kern)
  2373. {
  2374. struct sock *sk;
  2375. struct inet_protosw *answer;
  2376. @@ -679,6 +679,23 @@
  2377. lock_sock(sk2);
  2378. sock_rps_record_flow(sk2);
  2379. +
  2380. + if (sk2->sk_protocol == IPPROTO_TCP && tcp_sk(sk2)->mpc) {
  2381. + struct sock *sk_it = sk2;
  2382. +
  2383. + mptcp_for_each_sk(tcp_sk(sk2)->mpcb, sk_it)
  2384. + sock_rps_record_flow(sk_it);
  2385. +
  2386. + if (tcp_sk(sk2)->mpcb->master_sk) {
  2387. + sk_it = tcp_sk(sk2)->mpcb->master_sk;
  2388. +
  2389. + write_lock_bh(&sk_it->sk_callback_lock);
  2390. + sk_it->sk_wq = newsock->wq;
  2391. + sk_it->sk_socket = newsock;
  2392. + write_unlock_bh(&sk_it->sk_callback_lock);
  2393. + }
  2394. + }
  2395. +
  2396. WARN_ON(!((1 << sk2->sk_state) &
  2397. (TCPF_ESTABLISHED | TCPF_SYN_RECV |
  2398. TCPF_CLOSE_WAIT | TCPF_CLOSE)));
  2399. @@ -1770,6 +1787,9 @@
  2400. ip_init();
  2401. + /* We must initialize MPTCP before TCP. */
  2402. + mptcp_init();
  2403. +
  2404. tcp_v4_init();
  2405. /* Setup TCP slab cache for open requests. */
  2406. diff -Nur linux-3.14.45.orig/net/ipv4/inet_connection_sock.c linux-3.14.45/net/ipv4/inet_connection_sock.c
  2407. --- linux-3.14.45.orig/net/ipv4/inet_connection_sock.c 2015-06-23 02:01:36.000000000 +0200
  2408. +++ linux-3.14.45/net/ipv4/inet_connection_sock.c 2015-06-24 14:15:48.875862469 +0200
  2409. @@ -23,6 +23,7 @@
  2410. #include <net/route.h>
  2411. #include <net/tcp_states.h>
  2412. #include <net/xfrm.h>
  2413. +#include <net/mptcp.h>
  2414. #ifdef INET_CSK_DEBUG
  2415. const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
  2416. @@ -468,8 +469,8 @@
  2417. }
  2418. EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
  2419. -static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
  2420. - const u32 rnd, const u32 synq_hsize)
  2421. +u32 inet_synq_hash(const __be32 raddr, const __be16 rport, const u32 rnd,
  2422. + const u32 synq_hsize)
  2423. {
  2424. return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1);
  2425. }
  2426. @@ -667,7 +668,12 @@
  2427. const struct request_sock *req,
  2428. const gfp_t priority)
  2429. {
  2430. - struct sock *newsk = sk_clone_lock(sk, priority);
  2431. + struct sock *newsk;
  2432. +
  2433. + if (sk->sk_protocol == IPPROTO_TCP && tcp_sk(sk)->mpc)
  2434. + newsk = mptcp_sk_clone(sk, req->rsk_ops->family, priority);
  2435. + else
  2436. + newsk = sk_clone_lock(sk, priority);
  2437. if (newsk != NULL) {
  2438. struct inet_connection_sock *newicsk = inet_csk(newsk);
  2439. @@ -744,7 +750,8 @@
  2440. {
  2441. struct inet_sock *inet = inet_sk(sk);
  2442. struct inet_connection_sock *icsk = inet_csk(sk);
  2443. - int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
  2444. + int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries,
  2445. + GFP_KERNEL);
  2446. if (rc != 0)
  2447. return rc;
  2448. @@ -802,9 +809,14 @@
  2449. while ((req = acc_req) != NULL) {
  2450. struct sock *child = req->sk;
  2451. + bool mutex_taken = false;
  2452. acc_req = req->dl_next;
  2453. + if (is_meta_sk(child)) {
  2454. + mutex_lock(&tcp_sk(child)->mpcb->mpcb_mutex);
  2455. + mutex_taken = true;
  2456. + }
  2457. local_bh_disable();
  2458. bh_lock_sock(child);
  2459. WARN_ON(sock_owned_by_user(child));
  2460. @@ -833,6 +845,8 @@
  2461. bh_unlock_sock(child);
  2462. local_bh_enable();
  2463. + if (mutex_taken)
  2464. + mutex_unlock(&tcp_sk(child)->mpcb->mpcb_mutex);
  2465. sock_put(child);
  2466. sk_acceptq_removed(sk);
  2467. diff -Nur linux-3.14.45.orig/net/ipv4/syncookies.c linux-3.14.45/net/ipv4/syncookies.c
  2468. --- linux-3.14.45.orig/net/ipv4/syncookies.c 2015-06-23 02:01:36.000000000 +0200
  2469. +++ linux-3.14.45/net/ipv4/syncookies.c 2015-06-24 14:15:48.875862469 +0200
  2470. @@ -284,7 +284,7 @@
  2471. /* check for timestamp cookie support */
  2472. memset(&tcp_opt, 0, sizeof(tcp_opt));
  2473. - tcp_parse_options(skb, &tcp_opt, 0, NULL);
  2474. + tcp_parse_options(skb, &tcp_opt, NULL, 0, NULL);
  2475. if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok))
  2476. goto out;
  2477. @@ -354,10 +354,10 @@
  2478. /* Try to redo what tcp_v4_send_synack did. */
  2479. req->window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
  2480. - tcp_select_initial_window(tcp_full_space(sk), req->mss,
  2481. + tp->select_initial_window(tcp_full_space(sk), req->mss,
  2482. &req->rcv_wnd, &req->window_clamp,
  2483. ireq->wscale_ok, &rcv_wscale,
  2484. - dst_metric(&rt->dst, RTAX_INITRWND));
  2485. + dst_metric(&rt->dst, RTAX_INITRWND), sk);
  2486. ireq->rcv_wscale = rcv_wscale;
  2487. diff -Nur linux-3.14.45.orig/net/ipv4/tcp.c linux-3.14.45/net/ipv4/tcp.c
  2488. --- linux-3.14.45.orig/net/ipv4/tcp.c 2015-06-23 02:01:36.000000000 +0200
  2489. +++ linux-3.14.45/net/ipv4/tcp.c 2015-06-24 14:15:48.879862472 +0200
  2490. @@ -271,6 +271,7 @@
  2491. #include <net/icmp.h>
  2492. #include <net/inet_common.h>
  2493. +#include <net/mptcp.h>
  2494. #include <net/tcp.h>
  2495. #include <net/xfrm.h>
  2496. #include <net/ip.h>
  2497. @@ -419,6 +420,9 @@
  2498. sk->sk_sndbuf = sysctl_tcp_wmem[1];
  2499. sk->sk_rcvbuf = sysctl_tcp_rmem[1];
  2500. + /* Set function pointers in tcp_sock to tcp functions. */
  2501. + mptcp_init_tcp_sock(tp);
  2502. +
  2503. local_bh_disable();
  2504. sock_update_memcg(sk);
  2505. sk_sockets_allocated_inc(sk);
  2506. @@ -607,6 +611,8 @@
  2507. tcb->seq = tcb->end_seq = tp->write_seq;
  2508. tcb->tcp_flags = TCPHDR_ACK;
  2509. tcb->sacked = 0;
  2510. + if (tp->mpc)
  2511. + mptcp_skb_entail_init(tp, skb);
  2512. skb_header_release(skb);
  2513. tcp_add_write_queue_tail(sk, skb);
  2514. sk->sk_wmem_queued += skb->truesize;
  2515. @@ -640,8 +646,8 @@
  2516. atomic_read(&sk->sk_wmem_alloc) > skb->truesize;
  2517. }
  2518. -static void tcp_push(struct sock *sk, int flags, int mss_now,
  2519. - int nonagle, int size_goal)
  2520. +void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle,
  2521. + int size_goal)
  2522. {
  2523. struct tcp_sock *tp = tcp_sk(sk);
  2524. struct sk_buff *skb;
  2525. @@ -726,6 +732,14 @@
  2526. int ret;
  2527. sock_rps_record_flow(sk);
  2528. +
  2529. +#ifdef CONFIG_MPTCP
  2530. + if (tcp_sk(sk)->mpc) {
  2531. + struct sock *sk_it;
  2532. + mptcp_for_each_sk(tcp_sk(sk)->mpcb, sk_it)
  2533. + sock_rps_record_flow(sk_it);
  2534. + }
  2535. +#endif
  2536. /*
  2537. * We can't seek on a socket input
  2538. */
  2539. @@ -821,8 +835,7 @@
  2540. return NULL;
  2541. }
  2542. -static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
  2543. - int large_allowed)
  2544. +unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, int large_allowed)
  2545. {
  2546. struct tcp_sock *tp = tcp_sk(sk);
  2547. u32 xmit_size_goal, old_size_goal;
  2548. @@ -872,8 +885,13 @@
  2549. {
  2550. int mss_now;
  2551. - mss_now = tcp_current_mss(sk);
  2552. - *size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));
  2553. + if (tcp_sk(sk)->mpc) {
  2554. + mss_now = mptcp_current_mss(sk);
  2555. + *size_goal = mptcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));
  2556. + } else {
  2557. + mss_now = tcp_current_mss(sk);
  2558. + *size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));
  2559. + }
  2560. return mss_now;
  2561. }
  2562. @@ -897,6 +915,26 @@
  2563. goto out_err;
  2564. }
  2565. + if (tp->mpc) {
  2566. + struct sock *sk_it = sk;
  2567. +
  2568. + /* We must check this with socket-lock hold because we iterate
  2569. + * over the subflows.
  2570. + */
  2571. + if (!mptcp_can_sendpage(sk)) {
  2572. + ssize_t ret;
  2573. +
  2574. + release_sock(sk);
  2575. + ret = sock_no_sendpage(sk->sk_socket, page, offset,
  2576. + size, flags);
  2577. + lock_sock(sk);
  2578. + return ret;
  2579. + }
  2580. +
  2581. + mptcp_for_each_sk(tp->mpcb, sk_it)
  2582. + sock_rps_record_flow(sk_it);
  2583. + }
  2584. +
  2585. clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
  2586. mss_now = tcp_send_mss(sk, &size_goal, flags);
  2587. @@ -1001,8 +1039,9 @@
  2588. {
  2589. ssize_t res;
  2590. - if (!(sk->sk_route_caps & NETIF_F_SG) ||
  2591. - !(sk->sk_route_caps & NETIF_F_ALL_CSUM))
  2592. + /* If MPTCP is enabled, we check it later after establishment */
  2593. + if (!tcp_sk(sk)->mpc && (!(sk->sk_route_caps & NETIF_F_SG) ||
  2594. + !(sk->sk_route_caps & NETIF_F_ALL_CSUM)))
  2595. return sock_no_sendpage(sk->sk_socket, page, offset, size,
  2596. flags);
  2597. @@ -1018,6 +1057,9 @@
  2598. const struct tcp_sock *tp = tcp_sk(sk);
  2599. int tmp = tp->mss_cache;
  2600. + if (tp->mpc)
  2601. + return mptcp_select_size(sk, sg);
  2602. +
  2603. if (sg) {
  2604. if (sk_can_gso(sk)) {
  2605. /* Small frames wont use a full page:
  2606. @@ -1105,6 +1147,12 @@
  2607. goto do_error;
  2608. }
  2609. + if (tp->mpc) {
  2610. + struct sock *sk_it = sk;
  2611. + mptcp_for_each_sk(tp->mpcb, sk_it)
  2612. + sock_rps_record_flow(sk_it);
  2613. + }
  2614. +
  2615. if (unlikely(tp->repair)) {
  2616. if (tp->repair_queue == TCP_RECV_QUEUE) {
  2617. copied = tcp_send_rcvq(sk, msg, size);
  2618. @@ -1132,7 +1180,10 @@
  2619. if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
  2620. goto out_err;
  2621. - sg = !!(sk->sk_route_caps & NETIF_F_SG);
  2622. + if (tp->mpc)
  2623. + sg = mptcp_can_sg(sk);
  2624. + else
  2625. + sg = !!(sk->sk_route_caps & NETIF_F_SG);
  2626. while (--iovlen >= 0) {
  2627. size_t seglen = iov->iov_len;
  2628. @@ -1176,8 +1227,15 @@
  2629. /*
  2630. * Check whether we can use HW checksum.
  2631. + *
  2632. + * If dss-csum is enabled, we do not do hw-csum.
  2633. + * In case of non-mptcp we check the
  2634. + * device-capabilities.
  2635. + * In case of mptcp, hw-csum's will be handled
  2636. + * later in mptcp_write_xmit.
  2637. */
  2638. - if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
  2639. + if (((tp->mpc && !tp->mpcb->dss_csum) || !tp->mpc) &&
  2640. + (tp->mpc || sk->sk_route_caps & NETIF_F_ALL_CSUM))
  2641. skb->ip_summed = CHECKSUM_PARTIAL;
  2642. skb_entail(sk, skb);
  2643. @@ -1386,6 +1444,11 @@
  2644. struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
  2645. + if (is_meta_sk(sk)) {
  2646. + mptcp_cleanup_rbuf(sk, copied);
  2647. + return;
  2648. + }
  2649. +
  2650. WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq),
  2651. "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n",
  2652. tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt);
  2653. @@ -1422,7 +1485,7 @@
  2654. /* Optimize, __tcp_select_window() is not cheap. */
  2655. if (2*rcv_window_now <= tp->window_clamp) {
  2656. - __u32 new_window = __tcp_select_window(sk);
  2657. + __u32 new_window = tp->__select_window(sk);
  2658. /* Send ACK now, if this read freed lots of space
  2659. * in our buffer. Certainly, new_window is new window.
  2660. @@ -1623,6 +1686,14 @@
  2661. lock_sock(sk);
  2662. +#ifdef CONFIG_MPTCP
  2663. + if (tp->mpc) {
  2664. + struct sock *sk_it;
  2665. + mptcp_for_each_sk(tp->mpcb, sk_it)
  2666. + sock_rps_record_flow(sk_it);
  2667. + }
  2668. +#endif
  2669. +
  2670. err = -ENOTCONN;
  2671. if (sk->sk_state == TCP_LISTEN)
  2672. goto out;
  2673. @@ -2070,7 +2141,7 @@
  2674. /* TCP_CLOSING */ TCP_CLOSING,
  2675. };
  2676. -static int tcp_close_state(struct sock *sk)
  2677. +int tcp_close_state(struct sock *sk)
  2678. {
  2679. int next = (int)new_state[sk->sk_state];
  2680. int ns = next & TCP_STATE_MASK;
  2681. @@ -2099,8 +2170,12 @@
  2682. (TCPF_ESTABLISHED | TCPF_SYN_SENT |
  2683. TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
  2684. /* Clear out any half completed packets. FIN if needed. */
  2685. - if (tcp_close_state(sk))
  2686. - tcp_send_fin(sk);
  2687. + if (tcp_close_state(sk)) {
  2688. + if (!is_meta_sk(sk))
  2689. + tcp_send_fin(sk);
  2690. + else
  2691. + mptcp_send_fin(sk);
  2692. + }
  2693. }
  2694. }
  2695. EXPORT_SYMBOL(tcp_shutdown);
  2696. @@ -2125,6 +2200,11 @@
  2697. int data_was_unread = 0;
  2698. int state;
  2699. + if (is_meta_sk(sk)) {
  2700. + mptcp_close(sk, timeout);
  2701. + return;
  2702. + }
  2703. +
  2704. lock_sock(sk);
  2705. sk->sk_shutdown = SHUTDOWN_MASK;
  2706. @@ -2291,15 +2371,6 @@
  2707. }
  2708. EXPORT_SYMBOL(tcp_close);
  2709. -/* These states need RST on ABORT according to RFC793 */
  2710. -
  2711. -static inline bool tcp_need_reset(int state)
  2712. -{
  2713. - return (1 << state) &
  2714. - (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
  2715. - TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
  2716. -}
  2717. -
  2718. int tcp_disconnect(struct sock *sk, int flags)
  2719. {
  2720. struct inet_sock *inet = inet_sk(sk);
  2721. @@ -2340,6 +2411,13 @@
  2722. if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
  2723. inet_reset_saddr(sk);
  2724. + if (is_meta_sk(sk)) {
  2725. + mptcp_disconnect(sk);
  2726. + } else {
  2727. + if (tp->inside_tk_table)
  2728. + mptcp_hash_remove_bh(tp);
  2729. + }
  2730. +
  2731. sk->sk_shutdown = 0;
  2732. sock_reset_flag(sk, SOCK_DONE);
  2733. tp->srtt = 0;
  2734. @@ -2699,6 +2777,18 @@
  2735. tp->notsent_lowat = val;
  2736. sk->sk_write_space(sk);
  2737. break;
  2738. +#ifdef CONFIG_MPTCP
  2739. + case MPTCP_ENABLED:
  2740. + if (sk->sk_state == TCP_CLOSE || sk->sk_state == TCP_LISTEN) {
  2741. + if (val)
  2742. + tp->mptcp_enabled = 1;
  2743. + else
  2744. + tp->mptcp_enabled = 0;
  2745. + } else {
  2746. + err = -EPERM;
  2747. + }
  2748. + break;
  2749. +#endif
  2750. default:
  2751. err = -ENOPROTOOPT;
  2752. break;
  2753. @@ -2918,6 +3008,11 @@
  2754. case TCP_NOTSENT_LOWAT:
  2755. val = tp->notsent_lowat;
  2756. break;
  2757. +#ifdef CONFIG_MPTCP
  2758. + case MPTCP_ENABLED:
  2759. + val = tp->mptcp_enabled;
  2760. + break;
  2761. +#endif
  2762. default:
  2763. return -ENOPROTOOPT;
  2764. }
  2765. @@ -3088,8 +3183,11 @@
  2766. if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
  2767. TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
  2768. + WARN_ON(sk->sk_state == TCP_CLOSE);
  2769. tcp_set_state(sk, TCP_CLOSE);
  2770. +
  2771. tcp_clear_xmit_timers(sk);
  2772. +
  2773. if (req != NULL)
  2774. reqsk_fastopen_remove(sk, req, false);
  2775. diff -Nur linux-3.14.45.orig/net/ipv4/tcp_input.c linux-3.14.45/net/ipv4/tcp_input.c
  2776. --- linux-3.14.45.orig/net/ipv4/tcp_input.c 2015-06-23 02:01:36.000000000 +0200
  2777. +++ linux-3.14.45/net/ipv4/tcp_input.c 2015-06-24 14:15:48.883862476 +0200
  2778. @@ -74,6 +74,9 @@
  2779. #include <linux/ipsec.h>
  2780. #include <asm/unaligned.h>
  2781. #include <net/netdma.h>
  2782. +#include <net/mptcp.h>
  2783. +#include <net/mptcp_v4.h>
  2784. +#include <net/mptcp_v6.h>
  2785. int sysctl_tcp_timestamps __read_mostly = 1;
  2786. int sysctl_tcp_window_scaling __read_mostly = 1;
  2787. @@ -99,25 +102,6 @@
  2788. int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
  2789. int sysctl_tcp_early_retrans __read_mostly = 3;
  2790. -#define FLAG_DATA 0x01 /* Incoming frame contained data. */
  2791. -#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
  2792. -#define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */
  2793. -#define FLAG_RETRANS_DATA_ACKED 0x08 /* "" "" some of which was retransmitted. */
  2794. -#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */
  2795. -#define FLAG_DATA_SACKED 0x20 /* New SACK. */
  2796. -#define FLAG_ECE 0x40 /* ECE in this ACK */
  2797. -#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
  2798. -#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */
  2799. -#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
  2800. -#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */
  2801. -#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
  2802. -#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */
  2803. -
  2804. -#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
  2805. -#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
  2806. -#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
  2807. -#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
  2808. -
  2809. #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
  2810. #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
  2811. @@ -283,8 +267,12 @@
  2812. per_mss = roundup_pow_of_two(per_mss) +
  2813. SKB_DATA_ALIGN(sizeof(struct sk_buff));
  2814. - nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
  2815. - nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
  2816. + if (tp->mpc) {
  2817. + nr_segs = mptcp_check_snd_buf(tp);
  2818. + } else {
  2819. + nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
  2820. + nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
  2821. + }
  2822. /* Fast Recovery (RFC 5681 3.2) :
  2823. * Cubic needs 1.7 factor, rounded to 2 to include
  2824. @@ -292,8 +280,16 @@
  2825. */
  2826. sndmem = 2 * nr_segs * per_mss;
  2827. - if (sk->sk_sndbuf < sndmem)
  2828. + /* MPTCP: after this sndmem is the new contribution of the
  2829. + * current subflow to the aggregated sndbuf */
  2830. + if (sk->sk_sndbuf < sndmem) {
  2831. + int old_sndbuf = sk->sk_sndbuf;
  2832. sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
  2833. + /* MPTCP: ok, the subflow sndbuf has grown, reflect
  2834. + * this in the aggregate buffer.*/
  2835. + if (tp->mpc && old_sndbuf != sk->sk_sndbuf)
  2836. + mptcp_update_sndbuf(tp->mpcb);
  2837. + }
  2838. }
  2839. /* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
  2840. @@ -342,10 +338,12 @@
  2841. static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
  2842. {
  2843. struct tcp_sock *tp = tcp_sk(sk);
  2844. + struct sock *meta_sk = tp->mpc ? mptcp_meta_sk(sk) : sk;
  2845. + struct tcp_sock *meta_tp = tcp_sk(meta_sk);
  2846. /* Check #1 */
  2847. - if (tp->rcv_ssthresh < tp->window_clamp &&
  2848. - (int)tp->rcv_ssthresh < tcp_space(sk) &&
  2849. + if (meta_tp->rcv_ssthresh < meta_tp->window_clamp &&
  2850. + (int)meta_tp->rcv_ssthresh < tcp_space(sk) &&
  2851. !sk_under_memory_pressure(sk)) {
  2852. int incr;
  2853. @@ -353,14 +351,14 @@
  2854. * will fit to rcvbuf in future.
  2855. */
  2856. if (tcp_win_from_space(skb->truesize) <= skb->len)
  2857. - incr = 2 * tp->advmss;
  2858. + incr = 2 * meta_tp->advmss;
  2859. else
  2860. - incr = __tcp_grow_window(sk, skb);
  2861. + incr = __tcp_grow_window(meta_sk, skb);
  2862. if (incr) {
  2863. incr = max_t(int, incr, 2 * skb->len);
  2864. - tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
  2865. - tp->window_clamp);
  2866. + meta_tp->rcv_ssthresh = min(meta_tp->rcv_ssthresh + incr,
  2867. + meta_tp->window_clamp);
  2868. inet_csk(sk)->icsk_ack.quick |= 1;
  2869. }
  2870. }
  2871. @@ -543,7 +541,10 @@
  2872. int copied;
  2873. time = tcp_time_stamp - tp->rcvq_space.time;
  2874. - if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
  2875. + if (tp->mpc) {
  2876. + if (mptcp_check_rtt(tp, time))
  2877. + return;
  2878. + } else if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
  2879. return;
  2880. /* Number of bytes copied to user in last RTT */
  2881. @@ -768,7 +769,7 @@
  2882. /* Calculate rto without backoff. This is the second half of Van Jacobson's
  2883. * routine referred to above.
  2884. */
  2885. -static void tcp_set_rto(struct sock *sk)
  2886. +void tcp_set_rto(struct sock *sk)
  2887. {
  2888. const struct tcp_sock *tp = tcp_sk(sk);
  2889. /* Old crap is replaced with new one. 8)
  2890. @@ -2909,7 +2910,7 @@
  2891. return false;
  2892. tcp_rtt_estimator(sk, seq_rtt);
  2893. - tcp_set_rto(sk);
  2894. + tp->set_rto(sk);
  2895. /* RFC6298: only reset backoff on valid RTT measurement. */
  2896. inet_csk(sk)->icsk_backoff = 0;
  2897. @@ -2993,7 +2994,7 @@
  2898. }
  2899. /* If we get here, the whole TSO packet has not been acked. */
  2900. -static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
  2901. +u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
  2902. {
  2903. struct tcp_sock *tp = tcp_sk(sk);
  2904. u32 packets_acked;
  2905. @@ -3088,6 +3089,8 @@
  2906. */
  2907. if (!(scb->tcp_flags & TCPHDR_SYN)) {
  2908. flag |= FLAG_DATA_ACKED;
  2909. + if (tp->mpc && mptcp_is_data_seq(skb))
  2910. + flag |= MPTCP_FLAG_DATA_ACKED;
  2911. } else {
  2912. flag |= FLAG_SYN_ACKED;
  2913. tp->retrans_stamp = 0;
  2914. @@ -3190,7 +3193,7 @@
  2915. return flag;
  2916. }
  2917. -static void tcp_ack_probe(struct sock *sk)
  2918. +void tcp_ack_probe(struct sock *sk)
  2919. {
  2920. const struct tcp_sock *tp = tcp_sk(sk);
  2921. struct inet_connection_sock *icsk = inet_csk(sk);
  2922. @@ -3237,9 +3240,8 @@
  2923. /* Check that window update is acceptable.
  2924. * The function assumes that snd_una<=ack<=snd_next.
  2925. */
  2926. -static inline bool tcp_may_update_window(const struct tcp_sock *tp,
  2927. - const u32 ack, const u32 ack_seq,
  2928. - const u32 nwin)
  2929. +bool tcp_may_update_window(const struct tcp_sock *tp, const u32 ack,
  2930. + const u32 ack_seq, const u32 nwin)
  2931. {
  2932. return after(ack, tp->snd_una) ||
  2933. after(ack_seq, tp->snd_wl1) ||
  2934. @@ -3358,7 +3360,7 @@
  2935. }
  2936. /* This routine deals with incoming acks, but not outgoing ones. */
  2937. -static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
  2938. +static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
  2939. {
  2940. struct inet_connection_sock *icsk = inet_csk(sk);
  2941. struct tcp_sock *tp = tcp_sk(sk);
  2942. @@ -3453,6 +3455,16 @@
  2943. flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, sack_rtt);
  2944. acked -= tp->packets_out;
  2945. + if (tp->mpc) {
  2946. + if (mptcp_fallback_infinite(sk, flag)) {
  2947. + pr_err("%s resetting flow\n", __func__);
  2948. + mptcp_send_reset(sk);
  2949. + goto invalid_ack;
  2950. + }
  2951. +
  2952. + mptcp_clean_rtx_infinite(skb, sk);
  2953. + }
  2954. +
  2955. /* Advance cwnd if state allows */
  2956. if (tcp_may_raise_cwnd(sk, flag))
  2957. tcp_cong_avoid(sk, ack, acked, prior_in_flight);
  2958. @@ -3517,8 +3529,9 @@
  2959. * the fast version below fails.
  2960. */
  2961. void tcp_parse_options(const struct sk_buff *skb,
  2962. - struct tcp_options_received *opt_rx, int estab,
  2963. - struct tcp_fastopen_cookie *foc)
  2964. + struct tcp_options_received *opt_rx,
  2965. + struct mptcp_options_received *mopt,
  2966. + int estab, struct tcp_fastopen_cookie *foc)
  2967. {
  2968. const unsigned char *ptr;
  2969. const struct tcphdr *th = tcp_hdr(skb);
  2970. @@ -3601,6 +3614,10 @@
  2971. */
  2972. break;
  2973. #endif
  2974. + case TCPOPT_MPTCP:
  2975. + mptcp_parse_options(ptr - 2, opsize, opt_rx,
  2976. + mopt, skb);
  2977. + break;
  2978. case TCPOPT_EXP:
  2979. /* Fast Open option shares code 254 using a
  2980. * 16 bits magic number. It's valid only in
  2981. @@ -3662,8 +3679,8 @@
  2982. if (tcp_parse_aligned_timestamp(tp, th))
  2983. return true;
  2984. }
  2985. -
  2986. - tcp_parse_options(skb, &tp->rx_opt, 1, NULL);
  2987. + tcp_parse_options(skb, &tp->rx_opt, tp->mpc ? &tp->mptcp->rx_opt : NULL,
  2988. + 1, NULL);
  2989. if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
  2990. tp->rx_opt.rcv_tsecr -= tp->tsoffset;
  2991. @@ -3836,6 +3853,8 @@
  2992. dst = __sk_dst_get(sk);
  2993. if (!dst || !dst_metric(dst, RTAX_QUICKACK))
  2994. inet_csk(sk)->icsk_ack.pingpong = 1;
  2995. + if (tp->mpc)
  2996. + mptcp_sub_close_passive(sk);
  2997. break;
  2998. case TCP_CLOSE_WAIT:
  2999. @@ -3857,6 +3876,13 @@
  3000. tcp_set_state(sk, TCP_CLOSING);
  3001. break;
  3002. case TCP_FIN_WAIT2:
  3003. + if (tp->mpc) {
  3004. + /* The socket will get closed by mptcp_data_ready.
  3005. + * We first have to process all data-sequences.
  3006. + */
  3007. + tp->close_it = 1;
  3008. + break;
  3009. + }
  3010. /* Received a FIN -- send ACK and enter TIME_WAIT. */
  3011. tcp_send_ack(sk);
  3012. tcp_time_wait(sk, TCP_TIME_WAIT, 0);
  3013. @@ -3881,6 +3907,10 @@
  3014. if (!sock_flag(sk, SOCK_DEAD)) {
  3015. sk->sk_state_change(sk);
  3016. + /* Don't wake up MPTCP-subflows */
  3017. + if (tp->mpc)
  3018. + return;
  3019. +
  3020. /* Do not send POLL_HUP for half duplex close. */
  3021. if (sk->sk_shutdown == SHUTDOWN_MASK ||
  3022. sk->sk_state == TCP_CLOSE)
  3023. @@ -4078,7 +4108,11 @@
  3024. tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
  3025. }
  3026. - if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
  3027. + /* In case of MPTCP, the segment may be empty if it's a
  3028. + * non-data DATA_FIN. (see beginning of tcp_data_queue)
  3029. + */
  3030. + if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt) &&
  3031. + !(tp->mpc && TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq)) {
  3032. SOCK_DEBUG(sk, "ofo packet was already received\n");
  3033. __skb_unlink(skb, &tp->out_of_order_queue);
  3034. __kfree_skb(skb);
  3035. @@ -4102,6 +4136,9 @@
  3036. static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
  3037. unsigned int size)
  3038. {
  3039. + if (tcp_sk(sk)->mpc)
  3040. + sk = mptcp_meta_sk(sk);
  3041. +
  3042. if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
  3043. !sk_rmem_schedule(sk, skb, size)) {
  3044. @@ -4132,15 +4169,16 @@
  3045. * Better try to coalesce them right now to avoid future collapses.
  3046. * Returns true if caller should free @from instead of queueing it
  3047. */
  3048. -static bool tcp_try_coalesce(struct sock *sk,
  3049. - struct sk_buff *to,
  3050. - struct sk_buff *from,
  3051. - bool *fragstolen)
  3052. +bool tcp_try_coalesce(struct sock *sk, struct sk_buff *to, struct sk_buff *from,
  3053. + bool *fragstolen)
  3054. {
  3055. int delta;
  3056. *fragstolen = false;
  3057. + if (tcp_sk(sk)->mpc && !is_meta_sk(sk))
  3058. + return false;
  3059. +
  3060. if (tcp_hdr(from)->fin)
  3061. return false;
  3062. @@ -4230,7 +4268,9 @@
  3063. /* Do skb overlap to previous one? */
  3064. if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
  3065. - if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
  3066. + /* MPTCP allows non-data data-fin to be in the ofo-queue */
  3067. + if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq) &&
  3068. + !(tp->mpc && end_seq == seq)) {
  3069. /* All the bits are present. Drop. */
  3070. NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
  3071. __kfree_skb(skb);
  3072. @@ -4268,6 +4308,9 @@
  3073. end_seq);
  3074. break;
  3075. }
  3076. + /* MPTCP allows non-data data-fin to be in the ofo-queue */
  3077. + if (tp->mpc && TCP_SKB_CB(skb1)->seq == TCP_SKB_CB(skb1)->end_seq)
  3078. + continue;
  3079. __skb_unlink(skb1, &tp->out_of_order_queue);
  3080. tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
  3081. TCP_SKB_CB(skb1)->end_seq);
  3082. @@ -4285,8 +4328,8 @@
  3083. }
  3084. }
  3085. -static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
  3086. - bool *fragstolen)
  3087. +int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
  3088. + bool *fragstolen)
  3089. {
  3090. int eaten;
  3091. struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
  3092. @@ -4348,7 +4391,10 @@
  3093. int eaten = -1;
  3094. bool fragstolen = false;
  3095. - if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
  3096. + /* If no data is present, but a data_fin is in the options, we still
  3097. + * have to call mptcp_queue_skb later on. */
  3098. + if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq &&
  3099. + !(tp->mpc && mptcp_is_data_fin(skb)))
  3100. goto drop;
  3101. skb_dst_drop(skb);
  3102. @@ -4394,7 +4440,7 @@
  3103. eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
  3104. }
  3105. tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
  3106. - if (skb->len)
  3107. + if (skb->len || mptcp_is_data_fin(skb))
  3108. tcp_event_data_recv(sk, skb);
  3109. if (th->fin)
  3110. tcp_fin(sk);
  3111. @@ -4416,7 +4462,11 @@
  3112. if (eaten > 0)
  3113. kfree_skb_partial(skb, fragstolen);
  3114. - if (!sock_flag(sk, SOCK_DEAD))
  3115. + if (!sock_flag(sk, SOCK_DEAD) || tp->mpc)
  3116. + /* MPTCP: we always have to call data_ready, because
  3117. + * we may be about to receive a data-fin, which still
  3118. + * must get queued.
  3119. + */
  3120. sk->sk_data_ready(sk, 0);
  3121. return;
  3122. }
  3123. @@ -4468,6 +4518,8 @@
  3124. next = skb_queue_next(list, skb);
  3125. __skb_unlink(skb, list);
  3126. + if (tcp_sk(sk)->mpc)
  3127. + mptcp_remove_shortcuts(tcp_sk(sk)->mpcb, skb);
  3128. __kfree_skb(skb);
  3129. NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
  3130. @@ -4640,6 +4692,18 @@
  3131. struct tcp_sock *tp = tcp_sk(sk);
  3132. bool res = false;
  3133. + if (is_meta_sk(sk)) {
  3134. + if (!skb_queue_empty(&tp->out_of_order_queue)) {
  3135. + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED);
  3136. + mptcp_purge_ofo_queue(tp);
  3137. +
  3138. + /* No sack at the mptcp-level */
  3139. + sk_mem_reclaim(sk);
  3140. + res = true;
  3141. + }
  3142. + return res;
  3143. + }
  3144. +
  3145. if (!skb_queue_empty(&tp->out_of_order_queue)) {
  3146. NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED);
  3147. __skb_queue_purge(&tp->out_of_order_queue);
  3148. @@ -4729,7 +4793,7 @@
  3149. tp->snd_cwnd_stamp = tcp_time_stamp;
  3150. }
  3151. -static bool tcp_should_expand_sndbuf(const struct sock *sk)
  3152. +bool tcp_should_expand_sndbuf(const struct sock *sk)
  3153. {
  3154. const struct tcp_sock *tp = tcp_sk(sk);
  3155. @@ -4764,7 +4828,7 @@
  3156. {
  3157. struct tcp_sock *tp = tcp_sk(sk);
  3158. - if (tcp_should_expand_sndbuf(sk)) {
  3159. + if (tp->should_expand_sndbuf(sk)) {
  3160. tcp_sndbuf_expand(sk);
  3161. tp->snd_cwnd_stamp = tcp_time_stamp;
  3162. }
  3163. @@ -4776,8 +4840,9 @@
  3164. {
  3165. if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
  3166. sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
  3167. - if (sk->sk_socket &&
  3168. - test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
  3169. + if (tcp_sk(sk)->mpc ||
  3170. + (sk->sk_socket &&
  3171. + test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)))
  3172. tcp_new_space(sk);
  3173. }
  3174. }
  3175. @@ -4800,7 +4865,7 @@
  3176. /* ... and right edge of window advances far enough.
  3177. * (tcp_recvmsg() will send ACK otherwise). Or...
  3178. */
  3179. - __tcp_select_window(sk) >= tp->rcv_wnd) ||
  3180. + tp->__select_window(sk) >= tp->rcv_wnd) ||
  3181. /* We ACK each frame or... */
  3182. tcp_in_quickack_mode(sk) ||
  3183. /* We have out of order data. */
  3184. @@ -4902,6 +4967,10 @@
  3185. {
  3186. struct tcp_sock *tp = tcp_sk(sk);
  3187. + /* MPTCP urgent data is not yet supported */
  3188. + if (tp->mpc)
  3189. + return;
  3190. +
  3191. /* Check if we get a new urgent pointer - normally not. */
  3192. if (th->urg)
  3193. tcp_check_urg(sk, th);
  3194. @@ -4969,8 +5038,7 @@
  3195. }
  3196. #ifdef CONFIG_NET_DMA
  3197. -static bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
  3198. - int hlen)
  3199. +bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen)
  3200. {
  3201. struct tcp_sock *tp = tcp_sk(sk);
  3202. int chunk = skb->len - hlen;
  3203. @@ -5079,9 +5147,15 @@
  3204. goto discard;
  3205. }
  3206. + /* If valid: post process the received MPTCP options. */
  3207. + if (tp->mpc && mptcp_handle_options(sk, th, skb))
  3208. + goto discard;
  3209. +
  3210. return true;
  3211. discard:
  3212. + if (tp->mpc)
  3213. + mptcp_reset_mopt(tp);
  3214. __kfree_skb(skb);
  3215. return false;
  3216. }
  3217. @@ -5133,6 +5207,10 @@
  3218. tp->rx_opt.saw_tstamp = 0;
  3219. + /* MPTCP: force slowpath. */
  3220. + if (tp->mpc)
  3221. + goto slow_path;
  3222. +
  3223. /* pred_flags is 0xS?10 << 16 + snd_wnd
  3224. * if header_prediction is to be made
  3225. * 'S' will always be tp->tcp_header_len >> 2
  3226. @@ -5347,7 +5425,7 @@
  3227. */
  3228. tp->lsndtime = tcp_time_stamp;
  3229. - tcp_init_buffer_space(sk);
  3230. + tp->init_buffer_space(sk);
  3231. if (sock_flag(sk, SOCK_KEEPOPEN))
  3232. inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
  3233. @@ -5377,7 +5455,7 @@
  3234. /* Get original SYNACK MSS value if user MSS sets mss_clamp */
  3235. tcp_clear_options(&opt);
  3236. opt.user_mss = opt.mss_clamp = 0;
  3237. - tcp_parse_options(synack, &opt, 0, NULL);
  3238. + tcp_parse_options(synack, &opt, NULL, 0, NULL);
  3239. mss = opt.mss_clamp;
  3240. }
  3241. @@ -5412,8 +5490,11 @@
  3242. struct tcp_sock *tp = tcp_sk(sk);
  3243. struct tcp_fastopen_cookie foc = { .len = -1 };
  3244. int saved_clamp = tp->rx_opt.mss_clamp;
  3245. + struct mptcp_options_received mopt;
  3246. + mptcp_init_mp_opt(&mopt);
  3247. - tcp_parse_options(skb, &tp->rx_opt, 0, &foc);
  3248. + tcp_parse_options(skb, &tp->rx_opt,
  3249. + tp->mpc ? &tp->mptcp->rx_opt : &mopt, 0, &foc);
  3250. if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
  3251. tp->rx_opt.rcv_tsecr -= tp->tsoffset;
  3252. @@ -5460,6 +5541,21 @@
  3253. if (!th->syn)
  3254. goto discard_and_undo;
  3255. + if (tp->request_mptcp || tp->mpc) {
  3256. + int ret;
  3257. + ret = mptcp_rcv_synsent_state_process(sk, &sk,
  3258. + skb, &mopt);
  3259. +
  3260. + /* May have changed if we support MPTCP */
  3261. + tp = tcp_sk(sk);
  3262. + icsk = inet_csk(sk);
  3263. +
  3264. + if (ret == 1)
  3265. + goto reset_and_undo;
  3266. + if (ret == 2)
  3267. + goto discard;
  3268. + }
  3269. +
  3270. /* rfc793:
  3271. * "If the SYN bit is on ...
  3272. * are acceptable then ...
  3273. @@ -5472,6 +5568,15 @@
  3274. tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
  3275. tcp_ack(sk, skb, FLAG_SLOWPATH);
  3276. + if (tp->mpc && !is_master_tp(tp)) {
  3277. + /* Timer for repeating the ACK until an answer
  3278. + * arrives. Used only when establishing an additional
  3279. + * subflow inside of an MPTCP connection.
  3280. + */
  3281. + sk_reset_timer(sk, &tp->mptcp->mptcp_ack_timer,
  3282. + jiffies + icsk->icsk_rto);
  3283. + }
  3284. +
  3285. /* Ok.. it's good. Set up sequence numbers and
  3286. * move to established.
  3287. */
  3288. @@ -5498,6 +5603,11 @@
  3289. tp->tcp_header_len = sizeof(struct tcphdr);
  3290. }
  3291. + if (tp->mpc) {
  3292. + tp->tcp_header_len += MPTCP_SUB_LEN_DSM_ALIGN;
  3293. + tp->advmss -= MPTCP_SUB_LEN_DSM_ALIGN;
  3294. + }
  3295. +
  3296. if (tcp_is_sack(tp) && sysctl_tcp_fack)
  3297. tcp_enable_fack(tp);
  3298. @@ -5518,7 +5628,9 @@
  3299. tcp_rcv_fastopen_synack(sk, skb, &foc))
  3300. return -1;
  3301. - if (sk->sk_write_pending ||
  3302. + /* With MPTCP we cannot send data on the third ack due to the
  3303. + * lack of option-space */
  3304. + if ((sk->sk_write_pending && !tp->mpc) ||
  3305. icsk->icsk_accept_queue.rskq_defer_accept ||
  3306. icsk->icsk_ack.pingpong) {
  3307. /* Save one ACK. Data will be ready after
  3308. @@ -5560,6 +5672,7 @@
  3309. tcp_paws_reject(&tp->rx_opt, 0))
  3310. goto discard_and_undo;
  3311. + /* TODO - check this here for MPTCP */
  3312. if (th->syn) {
  3313. /* We see SYN without ACK. It is attempt of
  3314. * simultaneous connect with crossed SYNs.
  3315. @@ -5576,6 +5689,11 @@
  3316. tp->tcp_header_len = sizeof(struct tcphdr);
  3317. }
  3318. + if (tp->mpc) {
  3319. + tp->tcp_header_len += MPTCP_SUB_LEN_DSM_ALIGN;
  3320. + tp->advmss -= MPTCP_SUB_LEN_DSM_ALIGN;
  3321. + }
  3322. +
  3323. tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
  3324. tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
  3325. @@ -5634,6 +5752,7 @@
  3326. int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
  3327. const struct tcphdr *th, unsigned int len)
  3328. + __releases(&sk->sk_lock.slock)
  3329. {
  3330. struct tcp_sock *tp = tcp_sk(sk);
  3331. struct inet_connection_sock *icsk = inet_csk(sk);
  3332. @@ -5685,6 +5804,10 @@
  3333. case TCP_SYN_SENT:
  3334. queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
  3335. + if (is_meta_sk(sk)) {
  3336. + sk = tcp_sk(sk)->mpcb->master_sk;
  3337. + tp = tcp_sk(sk);
  3338. + }
  3339. if (queued >= 0)
  3340. return queued;
  3341. @@ -5692,6 +5815,8 @@
  3342. tcp_urg(sk, skb, th);
  3343. __kfree_skb(skb);
  3344. tcp_data_snd_check(sk);
  3345. + if (tp->mpc && is_master_tp(tp))
  3346. + bh_unlock_sock(sk);
  3347. return 0;
  3348. }
  3349. @@ -5734,7 +5859,7 @@
  3350. tcp_mtup_init(sk);
  3351. tp->copied_seq = tp->rcv_nxt;
  3352. - tcp_init_buffer_space(sk);
  3353. + tp->init_buffer_space(sk);
  3354. }
  3355. smp_mb();
  3356. tcp_set_state(sk, TCP_ESTABLISHED);
  3357. @@ -5754,6 +5879,8 @@
  3358. if (tp->rx_opt.tstamp_ok)
  3359. tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
  3360. + if (tp->mpc)
  3361. + tp->advmss -= MPTCP_SUB_LEN_DSM_ALIGN;
  3362. if (req) {
  3363. /* Re-arm the timer because data may have been sent out.
  3364. @@ -5775,6 +5902,12 @@
  3365. tcp_initialize_rcv_mss(sk);
  3366. tcp_fast_path_on(tp);
  3367. + /* Send an ACK when establishing a new
  3368. + * MPTCP subflow, i.e. using an MP_JOIN
  3369. + * subtype.
  3370. + */
  3371. + if (tp->mpc && !is_master_tp(tp))
  3372. + tcp_send_ack(sk);
  3373. break;
  3374. case TCP_FIN_WAIT1: {
  3375. @@ -5826,7 +5959,8 @@
  3376. tmo = tcp_fin_time(sk);
  3377. if (tmo > TCP_TIMEWAIT_LEN) {
  3378. inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
  3379. - } else if (th->fin || sock_owned_by_user(sk)) {
  3380. + } else if (th->fin || mptcp_is_data_fin(skb) ||
  3381. + sock_owned_by_user(sk)) {
  3382. /* Bad case. We could lose such FIN otherwise.
  3383. * It is not a big problem, but it looks confusing
  3384. * and not so rare event. We still can lose it now,
  3385. @@ -5855,6 +5989,9 @@
  3386. goto discard;
  3387. }
  3388. break;
  3389. + case TCP_CLOSE:
  3390. + if (tp->mp_killed)
  3391. + goto discard;
  3392. }
  3393. /* step 6: check the URG bit */
  3394. @@ -5875,7 +6012,11 @@
  3395. */
  3396. if (sk->sk_shutdown & RCV_SHUTDOWN) {
  3397. if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
  3398. - after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
  3399. + after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt) &&
  3400. + !tp->mpc) {
  3401. + /* In case of mptcp, the reset is handled by
  3402. + * mptcp_rcv_state_process
  3403. + */
  3404. NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
  3405. tcp_reset(sk);
  3406. return 1;
  3407. diff -Nur linux-3.14.45.orig/net/ipv4/tcp_ipv4.c linux-3.14.45/net/ipv4/tcp_ipv4.c
  3408. --- linux-3.14.45.orig/net/ipv4/tcp_ipv4.c 2015-06-23 02:01:36.000000000 +0200
  3409. +++ linux-3.14.45/net/ipv4/tcp_ipv4.c 2015-06-24 14:15:48.883862476 +0200
  3410. @@ -67,6 +67,8 @@
  3411. #include <net/icmp.h>
  3412. #include <net/inet_hashtables.h>
  3413. #include <net/tcp.h>
  3414. +#include <net/mptcp.h>
  3415. +#include <net/mptcp_v4.h>
  3416. #include <net/transp_v6.h>
  3417. #include <net/ipv6.h>
  3418. #include <net/inet_common.h>
  3419. @@ -99,7 +101,7 @@
  3420. struct inet_hashinfo tcp_hashinfo;
  3421. EXPORT_SYMBOL(tcp_hashinfo);
  3422. -static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
  3423. +__u32 tcp_v4_init_sequence(const struct sk_buff *skb)
  3424. {
  3425. return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
  3426. ip_hdr(skb)->saddr,
  3427. @@ -335,7 +337,7 @@
  3428. struct inet_sock *inet;
  3429. const int type = icmp_hdr(icmp_skb)->type;
  3430. const int code = icmp_hdr(icmp_skb)->code;
  3431. - struct sock *sk;
  3432. + struct sock *sk, *meta_sk;
  3433. struct sk_buff *skb;
  3434. struct request_sock *req;
  3435. __u32 seq;
  3436. @@ -359,13 +361,19 @@
  3437. return;
  3438. }
  3439. - bh_lock_sock(sk);
  3440. + tp = tcp_sk(sk);
  3441. + if (tp->mpc)
  3442. + meta_sk = mptcp_meta_sk(sk);
  3443. + else
  3444. + meta_sk = sk;
  3445. +
  3446. + bh_lock_sock(meta_sk);
  3447. /* If too many ICMPs get dropped on busy
  3448. * servers this needs to be solved differently.
  3449. * We do take care of PMTU discovery (RFC1191) special case :
  3450. * we can receive locally generated ICMP messages while socket is held.
  3451. */
  3452. - if (sock_owned_by_user(sk)) {
  3453. + if (sock_owned_by_user(meta_sk)) {
  3454. if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
  3455. NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
  3456. }
  3457. @@ -378,7 +386,6 @@
  3458. }
  3459. icsk = inet_csk(sk);
  3460. - tp = tcp_sk(sk);
  3461. req = tp->fastopen_rsk;
  3462. seq = ntohl(th->seq);
  3463. if (sk->sk_state != TCP_LISTEN &&
  3464. @@ -412,11 +419,13 @@
  3465. goto out;
  3466. tp->mtu_info = info;
  3467. - if (!sock_owned_by_user(sk)) {
  3468. + if (!sock_owned_by_user(meta_sk)) {
  3469. tcp_v4_mtu_reduced(sk);
  3470. } else {
  3471. if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
  3472. sock_hold(sk);
  3473. + if (tp->mpc)
  3474. + mptcp_tsq_flags(sk);
  3475. }
  3476. goto out;
  3477. }
  3478. @@ -432,7 +441,7 @@
  3479. /* XXX (TFO) - revisit the following logic for TFO */
  3480. - if (sock_owned_by_user(sk))
  3481. + if (sock_owned_by_user(meta_sk))
  3482. break;
  3483. icsk->icsk_backoff--;
  3484. @@ -474,7 +483,7 @@
  3485. switch (sk->sk_state) {
  3486. struct request_sock *req, **prev;
  3487. case TCP_LISTEN:
  3488. - if (sock_owned_by_user(sk))
  3489. + if (sock_owned_by_user(meta_sk))
  3490. goto out;
  3491. req = inet_csk_search_req(sk, &prev, th->dest,
  3492. @@ -507,7 +516,7 @@
  3493. It can f.e. if SYNs crossed,
  3494. or Fast Open.
  3495. */
  3496. - if (!sock_owned_by_user(sk)) {
  3497. + if (!sock_owned_by_user(meta_sk)) {
  3498. sk->sk_err = err;
  3499. sk->sk_error_report(sk);
  3500. @@ -536,7 +545,7 @@
  3501. */
  3502. inet = inet_sk(sk);
  3503. - if (!sock_owned_by_user(sk) && inet->recverr) {
  3504. + if (!sock_owned_by_user(meta_sk) && inet->recverr) {
  3505. sk->sk_err = err;
  3506. sk->sk_error_report(sk);
  3507. } else { /* Only an error on timeout */
  3508. @@ -544,7 +553,7 @@
  3509. }
  3510. out:
  3511. - bh_unlock_sock(sk);
  3512. + bh_unlock_sock(meta_sk);
  3513. sock_put(sk);
  3514. }
  3515. @@ -586,7 +595,7 @@
  3516. * Exception: precedence violation. We do not implement it in any case.
  3517. */
  3518. -static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
  3519. +void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
  3520. {
  3521. const struct tcphdr *th = tcp_hdr(skb);
  3522. struct {
  3523. @@ -711,10 +720,10 @@
  3524. outside socket context is ugly, certainly. What can I do?
  3525. */
  3526. -static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
  3527. +static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 data_ack,
  3528. u32 win, u32 tsval, u32 tsecr, int oif,
  3529. struct tcp_md5sig_key *key,
  3530. - int reply_flags, u8 tos)
  3531. + int reply_flags, u8 tos, int mptcp)
  3532. {
  3533. const struct tcphdr *th = tcp_hdr(skb);
  3534. struct {
  3535. @@ -723,6 +732,10 @@
  3536. #ifdef CONFIG_TCP_MD5SIG
  3537. + (TCPOLEN_MD5SIG_ALIGNED >> 2)
  3538. #endif
  3539. +#ifdef CONFIG_MPTCP
  3540. + + ((MPTCP_SUB_LEN_DSS >> 2) +
  3541. + (MPTCP_SUB_LEN_ACK >> 2))
  3542. +#endif
  3543. ];
  3544. } rep;
  3545. struct ip_reply_arg arg;
  3546. @@ -767,6 +780,21 @@
  3547. ip_hdr(skb)->daddr, &rep.th);
  3548. }
  3549. #endif
  3550. +#ifdef CONFIG_MPTCP
  3551. + if (mptcp) {
  3552. + int offset = (tsecr) ? 3 : 0;
  3553. + /* Construction of 32-bit data_ack */
  3554. + rep.opt[offset++] = htonl((TCPOPT_MPTCP << 24) |
  3555. + ((MPTCP_SUB_LEN_DSS + MPTCP_SUB_LEN_ACK) << 16) |
  3556. + (0x20 << 8) |
  3557. + (0x01));
  3558. + rep.opt[offset] = htonl(data_ack);
  3559. +
  3560. + arg.iov[0].iov_len += MPTCP_SUB_LEN_DSS + MPTCP_SUB_LEN_ACK;
  3561. + rep.th.doff = arg.iov[0].iov_len / 4;
  3562. + }
  3563. +#endif /* CONFIG_MPTCP */
  3564. +
  3565. arg.flags = reply_flags;
  3566. arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
  3567. ip_hdr(skb)->saddr, /* XXX */
  3568. @@ -786,36 +814,44 @@
  3569. {
  3570. struct inet_timewait_sock *tw = inet_twsk(sk);
  3571. struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
  3572. + u32 data_ack = 0;
  3573. + int mptcp = 0;
  3574. +
  3575. + if (tcptw->mptcp_tw && tcptw->mptcp_tw->meta_tw) {
  3576. + data_ack = (u32)tcptw->mptcp_tw->rcv_nxt;
  3577. + mptcp = 1;
  3578. + }
  3579. tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
  3580. + data_ack,
  3581. tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
  3582. tcp_time_stamp + tcptw->tw_ts_offset,
  3583. tcptw->tw_ts_recent,
  3584. tw->tw_bound_dev_if,
  3585. tcp_twsk_md5_key(tcptw),
  3586. tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
  3587. - tw->tw_tos
  3588. + tw->tw_tos, mptcp
  3589. );
  3590. inet_twsk_put(tw);
  3591. }
  3592. -static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
  3593. - struct request_sock *req)
  3594. +void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
  3595. + struct request_sock *req)
  3596. {
  3597. /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
  3598. * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
  3599. */
  3600. tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
  3601. tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
  3602. - tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
  3603. + tcp_rsk(req)->rcv_nxt, 0, req->rcv_wnd,
  3604. tcp_time_stamp,
  3605. req->ts_recent,
  3606. 0,
  3607. tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
  3608. AF_INET),
  3609. inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
  3610. - ip_hdr(skb)->tos);
  3611. + ip_hdr(skb)->tos, 0);
  3612. }
  3613. /*
  3614. @@ -823,9 +859,9 @@
  3615. * This still operates on a request_sock only, not on a big
  3616. * socket.
  3617. */
  3618. -static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
  3619. - struct request_sock *req,
  3620. - u16 queue_mapping)
  3621. +int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
  3622. + struct request_sock *req,
  3623. + u16 queue_mapping)
  3624. {
  3625. const struct inet_request_sock *ireq = inet_rsk(req);
  3626. struct flowi4 fl4;
  3627. @@ -853,7 +889,7 @@
  3628. return err;
  3629. }
  3630. -static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
  3631. +int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
  3632. {
  3633. int res = tcp_v4_send_synack(sk, NULL, req, 0);
  3634. @@ -865,7 +901,7 @@
  3635. /*
  3636. * IPv4 request_sock destructor.
  3637. */
  3638. -static void tcp_v4_reqsk_destructor(struct request_sock *req)
  3639. +void tcp_v4_reqsk_destructor(struct request_sock *req)
  3640. {
  3641. kfree(inet_rsk(req)->opt);
  3642. }
  3643. @@ -905,7 +941,7 @@
  3644. /*
  3645. * Save and compile IPv4 options into the request_sock if needed.
  3646. */
  3647. -static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
  3648. +struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
  3649. {
  3650. const struct ip_options *opt = &(IPCB(skb)->opt);
  3651. struct ip_options_rcu *dopt = NULL;
  3652. @@ -1257,7 +1293,7 @@
  3653. };
  3654. #ifdef CONFIG_TCP_MD5SIG
  3655. -static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
  3656. +const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
  3657. .md5_lookup = tcp_v4_reqsk_md5_lookup,
  3658. .calc_md5_hash = tcp_v4_md5_hash_skb,
  3659. };
  3660. @@ -1415,7 +1451,7 @@
  3661. tcp_init_congestion_control(child);
  3662. tcp_mtup_init(child);
  3663. tcp_init_metrics(child);
  3664. - tcp_init_buffer_space(child);
  3665. + tp->init_buffer_space(child);
  3666. /* Queue the data carried in the SYN packet. We need to first
  3667. * bump skb's refcnt because the caller will attempt to free it.
  3668. @@ -1447,6 +1483,7 @@
  3669. int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
  3670. {
  3671. struct tcp_options_received tmp_opt;
  3672. + struct mptcp_options_received mopt;
  3673. struct request_sock *req;
  3674. struct inet_request_sock *ireq;
  3675. struct tcp_sock *tp = tcp_sk(sk);
  3676. @@ -1461,6 +1498,22 @@
  3677. struct sk_buff *skb_synack;
  3678. int do_fastopen;
  3679. + tcp_clear_options(&tmp_opt);
  3680. + tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
  3681. + tmp_opt.user_mss = tp->rx_opt.user_mss;
  3682. + mptcp_init_mp_opt(&mopt);
  3683. + tcp_parse_options(skb, &tmp_opt, &mopt, 0, want_cookie ? NULL : &foc);
  3684. +
  3685. +#ifdef CONFIG_MPTCP
  3686. + /* MPTCP structures not initialized, so clear MPTCP fields */
  3687. + if (mptcp_init_failed)
  3688. + mptcp_init_mp_opt(&mopt);
  3689. +
  3690. + if (mopt.is_mp_join)
  3691. + return mptcp_do_join_short(skb, &mopt, &tmp_opt, sock_net(sk));
  3692. + if (mopt.drop_me)
  3693. + goto drop;
  3694. +#endif
  3695. /* Never answer to SYNs send to broadcast or multicast */
  3696. if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
  3697. goto drop;
  3698. @@ -1486,7 +1539,22 @@
  3699. goto drop;
  3700. }
  3701. - req = inet_reqsk_alloc(&tcp_request_sock_ops);
  3702. +#ifdef CONFIG_MPTCP
  3703. + if (sysctl_mptcp_enabled == MPTCP_APP && !tp->mptcp_enabled)
  3704. + mopt.saw_mpc = 0;
  3705. + if (mopt.saw_mpc && !want_cookie) {
  3706. + req = inet_reqsk_alloc(&mptcp_request_sock_ops);
  3707. +
  3708. + if (!req)
  3709. + goto drop;
  3710. +
  3711. + mptcp_rsk(req)->mpcb = NULL;
  3712. + mptcp_rsk(req)->dss_csum = mopt.dss_csum;
  3713. + mptcp_rsk(req)->collide_tk.pprev = NULL;
  3714. + } else
  3715. +#endif
  3716. + req = inet_reqsk_alloc(&tcp_request_sock_ops);
  3717. +
  3718. if (!req)
  3719. goto drop;
  3720. @@ -1494,17 +1562,15 @@
  3721. tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
  3722. #endif
  3723. - tcp_clear_options(&tmp_opt);
  3724. - tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
  3725. - tmp_opt.user_mss = tp->rx_opt.user_mss;
  3726. - tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
  3727. -
  3728. if (want_cookie && !tmp_opt.saw_tstamp)
  3729. tcp_clear_options(&tmp_opt);
  3730. tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
  3731. tcp_openreq_init(req, &tmp_opt, skb);
  3732. + if (mopt.saw_mpc && !want_cookie)
  3733. + mptcp_reqsk_new_mptcp(req, &tmp_opt, &mopt, skb);
  3734. +
  3735. ireq = inet_rsk(req);
  3736. ireq->ir_loc_addr = daddr;
  3737. ireq->ir_rmt_addr = saddr;
  3738. @@ -1716,7 +1782,7 @@
  3739. }
  3740. EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
  3741. -static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
  3742. +struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
  3743. {
  3744. struct tcphdr *th = tcp_hdr(skb);
  3745. const struct iphdr *iph = ip_hdr(skb);
  3746. @@ -1733,8 +1799,15 @@
  3747. if (nsk) {
  3748. if (nsk->sk_state != TCP_TIME_WAIT) {
  3749. + /* Don't lock again the meta-sk. It has been locked
  3750. + * before mptcp_v4_do_rcv.
  3751. + */
  3752. + if (tcp_sk(nsk)->mpc && !is_meta_sk(sk))
  3753. + bh_lock_sock(mptcp_meta_sk(nsk));
  3754. bh_lock_sock(nsk);
  3755. +
  3756. return nsk;
  3757. +
  3758. }
  3759. inet_twsk_put(inet_twsk(nsk));
  3760. return NULL;
  3761. @@ -1791,6 +1864,9 @@
  3762. goto discard;
  3763. #endif
  3764. + if (is_meta_sk(sk))
  3765. + return mptcp_v4_do_rcv(sk, skb);
  3766. +
  3767. if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
  3768. struct dst_entry *dst = sk->sk_rx_dst;
  3769. @@ -1922,7 +1998,7 @@
  3770. } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
  3771. wake_up_interruptible_sync_poll(sk_sleep(sk),
  3772. POLLIN | POLLRDNORM | POLLRDBAND);
  3773. - if (!inet_csk_ack_scheduled(sk))
  3774. + if (!inet_csk_ack_scheduled(sk) && !tp->mpc)
  3775. inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
  3776. (3 * tcp_rto_min(sk)) / 4,
  3777. TCP_RTO_MAX);
  3778. @@ -1939,7 +2015,7 @@
  3779. {
  3780. const struct iphdr *iph;
  3781. const struct tcphdr *th;
  3782. - struct sock *sk;
  3783. + struct sock *sk, *meta_sk = NULL;
  3784. int ret;
  3785. struct net *net = dev_net(skb->dev);
  3786. @@ -1972,18 +2048,42 @@
  3787. TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
  3788. skb->len - th->doff * 4);
  3789. TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
  3790. +#ifdef CONFIG_MPTCP
  3791. + TCP_SKB_CB(skb)->mptcp_flags = 0;
  3792. + TCP_SKB_CB(skb)->dss_off = 0;
  3793. +#endif
  3794. TCP_SKB_CB(skb)->when = 0;
  3795. TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
  3796. TCP_SKB_CB(skb)->sacked = 0;
  3797. sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
  3798. - if (!sk)
  3799. - goto no_tcp_socket;
  3800. process:
  3801. - if (sk->sk_state == TCP_TIME_WAIT)
  3802. + if (sk && sk->sk_state == TCP_TIME_WAIT)
  3803. goto do_time_wait;
  3804. +#ifdef CONFIG_MPTCP
  3805. + if (!sk && th->syn && !th->ack) {
  3806. + int ret = mptcp_lookup_join(skb, NULL);
  3807. +
  3808. + if (ret < 0) {
  3809. + tcp_v4_send_reset(NULL, skb);
  3810. + goto discard_it;
  3811. + } else if (ret > 0) {
  3812. + return 0;
  3813. + }
  3814. + }
  3815. +
  3816. + /* Is there a pending request sock for this segment ? */
  3817. + if ((!sk || sk->sk_state == TCP_LISTEN) && mptcp_check_req(skb, net)) {
  3818. + if (sk)
  3819. + sock_put(sk);
  3820. + return 0;
  3821. + }
  3822. +#endif
  3823. + if (!sk)
  3824. + goto no_tcp_socket;
  3825. +
  3826. if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
  3827. NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
  3828. goto discard_and_relse;
  3829. @@ -1999,11 +2099,21 @@
  3830. sk_mark_napi_id(sk, skb);
  3831. skb->dev = NULL;
  3832. - bh_lock_sock_nested(sk);
  3833. + if (tcp_sk(sk)->mpc) {
  3834. + meta_sk = mptcp_meta_sk(sk);
  3835. +
  3836. + bh_lock_sock_nested(meta_sk);
  3837. + if (sock_owned_by_user(meta_sk))
  3838. + skb->sk = sk;
  3839. + } else {
  3840. + meta_sk = sk;
  3841. + bh_lock_sock_nested(sk);
  3842. + }
  3843. +
  3844. ret = 0;
  3845. - if (!sock_owned_by_user(sk)) {
  3846. + if (!sock_owned_by_user(meta_sk)) {
  3847. #ifdef CONFIG_NET_DMA
  3848. - struct tcp_sock *tp = tcp_sk(sk);
  3849. + struct tcp_sock *tp = tcp_sk(meta_sk);
  3850. if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
  3851. tp->ucopy.dma_chan = net_dma_find_channel();
  3852. if (tp->ucopy.dma_chan)
  3853. @@ -2011,16 +2121,16 @@
  3854. else
  3855. #endif
  3856. {
  3857. - if (!tcp_prequeue(sk, skb))
  3858. + if (!tcp_prequeue(meta_sk, skb))
  3859. ret = tcp_v4_do_rcv(sk, skb);
  3860. }
  3861. - } else if (unlikely(sk_add_backlog(sk, skb,
  3862. - sk->sk_rcvbuf + sk->sk_sndbuf))) {
  3863. - bh_unlock_sock(sk);
  3864. + } else if (unlikely(sk_add_backlog(meta_sk, skb,
  3865. + meta_sk->sk_rcvbuf + meta_sk->sk_sndbuf))) {
  3866. + bh_unlock_sock(meta_sk);
  3867. NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
  3868. goto discard_and_relse;
  3869. }
  3870. - bh_unlock_sock(sk);
  3871. + bh_unlock_sock(meta_sk);
  3872. sock_put(sk);
  3873. @@ -2075,6 +2185,18 @@
  3874. sk = sk2;
  3875. goto process;
  3876. }
  3877. +#ifdef CONFIG_MPTCP
  3878. + if (th->syn && !th->ack) {
  3879. + int ret = mptcp_lookup_join(skb, inet_twsk(sk));
  3880. +
  3881. + if (ret < 0) {
  3882. + tcp_v4_send_reset(NULL, skb);
  3883. + goto discard_it;
  3884. + } else if (ret > 0) {
  3885. + return 0;
  3886. + }
  3887. + }
  3888. +#endif
  3889. /* Fall through to ACK */
  3890. }
  3891. case TCP_TW_ACK:
  3892. @@ -2158,6 +2280,11 @@
  3893. tcp_cleanup_congestion_control(sk);
  3894. + if (tp->mpc)
  3895. + mptcp_destroy_sock(sk);
  3896. + if (tp->inside_tk_table)
  3897. + mptcp_hash_remove(tp);
  3898. +
  3899. /* Cleanup up the write buffer. */
  3900. tcp_write_queue_purge(sk);
  3901. diff -Nur linux-3.14.45.orig/net/ipv4/tcp_minisocks.c linux-3.14.45/net/ipv4/tcp_minisocks.c
  3902. --- linux-3.14.45.orig/net/ipv4/tcp_minisocks.c 2015-06-23 02:01:36.000000000 +0200
  3903. +++ linux-3.14.45/net/ipv4/tcp_minisocks.c 2015-06-24 14:15:48.887862480 +0200
  3904. @@ -18,11 +18,13 @@
  3905. * Jorge Cwik, <jorge@laser.satlink.net>
  3906. */
  3907. +#include <linux/kconfig.h>
  3908. #include <linux/mm.h>
  3909. #include <linux/module.h>
  3910. #include <linux/slab.h>
  3911. #include <linux/sysctl.h>
  3912. #include <linux/workqueue.h>
  3913. +#include <net/mptcp.h>
  3914. #include <net/tcp.h>
  3915. #include <net/inet_common.h>
  3916. #include <net/xfrm.h>
  3917. @@ -95,10 +97,13 @@
  3918. struct tcp_options_received tmp_opt;
  3919. struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
  3920. bool paws_reject = false;
  3921. + struct mptcp_options_received mopt;
  3922. tmp_opt.saw_tstamp = 0;
  3923. if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
  3924. - tcp_parse_options(skb, &tmp_opt, 0, NULL);
  3925. + mptcp_init_mp_opt(&mopt);
  3926. +
  3927. + tcp_parse_options(skb, &tmp_opt, &mopt, 0, NULL);
  3928. if (tmp_opt.saw_tstamp) {
  3929. tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset;
  3930. @@ -106,6 +111,11 @@
  3931. tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
  3932. paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
  3933. }
  3934. +
  3935. + if (unlikely(mopt.mp_fclose) && tcptw->mptcp_tw) {
  3936. + if (mopt.mptcp_key == tcptw->mptcp_tw->loc_key)
  3937. + goto kill_with_rst;
  3938. + }
  3939. }
  3940. if (tw->tw_substate == TCP_FIN_WAIT2) {
  3941. @@ -128,6 +138,16 @@
  3942. if (!th->ack ||
  3943. !after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) ||
  3944. TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) {
  3945. + /* If mptcp_is_data_fin() returns true, we are sure that
  3946. + * mopt has been initialized - otherwise it would not
  3947. + * be a DATA_FIN.
  3948. + */
  3949. + if (tcptw->mptcp_tw && tcptw->mptcp_tw->meta_tw &&
  3950. + mptcp_is_data_fin(skb) &&
  3951. + TCP_SKB_CB(skb)->seq == tcptw->tw_rcv_nxt &&
  3952. + mopt.data_seq + 1 == (u32)tcptw->mptcp_tw->rcv_nxt)
  3953. + return TCP_TW_ACK;
  3954. +
  3955. inet_twsk_put(tw);
  3956. return TCP_TW_SUCCESS;
  3957. }
  3958. @@ -270,6 +290,11 @@
  3959. const struct tcp_sock *tp = tcp_sk(sk);
  3960. bool recycle_ok = false;
  3961. + if (is_meta_sk(sk)) {
  3962. + mptcp_update_tw_socks(tp, state);
  3963. + goto tcp_done;
  3964. + }
  3965. +
  3966. if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
  3967. recycle_ok = tcp_remember_stamp(sk);
  3968. @@ -290,6 +315,15 @@
  3969. tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
  3970. tcptw->tw_ts_offset = tp->tsoffset;
  3971. + if (tp->mpc) {
  3972. + if (mptcp_time_wait(sk, tcptw)) {
  3973. + inet_twsk_free(tw);
  3974. + goto exit;
  3975. + }
  3976. + } else {
  3977. + tcptw->mptcp_tw = NULL;
  3978. + }
  3979. +
  3980. #if IS_ENABLED(CONFIG_IPV6)
  3981. if (tw->tw_family == PF_INET6) {
  3982. struct ipv6_pinfo *np = inet6_sk(sk);
  3983. @@ -347,15 +381,19 @@
  3984. NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW);
  3985. }
  3986. +exit:
  3987. tcp_update_metrics(sk);
  3988. +tcp_done:
  3989. tcp_done(sk);
  3990. }
  3991. void tcp_twsk_destructor(struct sock *sk)
  3992. {
  3993. -#ifdef CONFIG_TCP_MD5SIG
  3994. struct tcp_timewait_sock *twsk = tcp_twsk(sk);
  3995. + if (twsk->mptcp_tw)
  3996. + mptcp_twsk_destructor(twsk);
  3997. +#ifdef CONFIG_TCP_MD5SIG
  3998. if (twsk->tw_md5_key)
  3999. kfree_rcu(twsk->tw_md5_key, rcu);
  4000. #endif
  4001. @@ -392,6 +430,9 @@
  4002. newtp->snd_sml = newtp->snd_una =
  4003. newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
  4004. +#ifdef CONFIG_MPTCP
  4005. + memset(&newtp->rcvq_space, 0, sizeof(newtp->rcvq_space));
  4006. +#endif
  4007. tcp_prequeue_init(newtp);
  4008. INIT_LIST_HEAD(&newtp->tsq_node);
  4009. @@ -436,7 +477,11 @@
  4010. newtp->urg_data = 0;
  4011. - if (sock_flag(newsk, SOCK_KEEPOPEN))
  4012. + /* MPTCP: If we are creating a subflow, KEEPOPEN might have been
  4013. + * set on the meta. But, keepalive is entirely handled at the
  4014. + * meta-socket, so let's keep it there.
  4015. + */
  4016. + if (sock_flag(newsk, SOCK_KEEPOPEN) && is_meta_sk(sk))
  4017. inet_csk_reset_keepalive_timer(newsk,
  4018. keepalive_time_when(newtp));
  4019. @@ -468,6 +513,8 @@
  4020. newtp->rx_opt.ts_recent_stamp = 0;
  4021. newtp->tcp_header_len = sizeof(struct tcphdr);
  4022. }
  4023. + if (treq->saw_mpc)
  4024. + newtp->tcp_header_len += MPTCP_SUB_LEN_DSM_ALIGN;
  4025. newtp->tsoffset = 0;
  4026. #ifdef CONFIG_TCP_MD5SIG
  4027. newtp->md5sig_info = NULL; /*XXX*/
  4028. @@ -504,16 +551,20 @@
  4029. bool fastopen)
  4030. {
  4031. struct tcp_options_received tmp_opt;
  4032. + struct mptcp_options_received mopt;
  4033. struct sock *child;
  4034. const struct tcphdr *th = tcp_hdr(skb);
  4035. __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
  4036. bool paws_reject = false;
  4037. - BUG_ON(fastopen == (sk->sk_state == TCP_LISTEN));
  4038. + BUG_ON(!tcp_sk(sk)->mpc && fastopen == (sk->sk_state == TCP_LISTEN));
  4039. tmp_opt.saw_tstamp = 0;
  4040. +
  4041. + mptcp_init_mp_opt(&mopt);
  4042. +
  4043. if (th->doff > (sizeof(struct tcphdr)>>2)) {
  4044. - tcp_parse_options(skb, &tmp_opt, 0, NULL);
  4045. + tcp_parse_options(skb, &tmp_opt, &mopt, 0, NULL);
  4046. if (tmp_opt.saw_tstamp) {
  4047. tmp_opt.ts_recent = req->ts_recent;
  4048. @@ -552,7 +603,14 @@
  4049. *
  4050. * Reset timer after retransmitting SYNACK, similar to
  4051. * the idea of fast retransmit in recovery.
  4052. + *
  4053. + * Fall back to TCP if MP_CAPABLE is not set.
  4054. */
  4055. +
  4056. + if (tcp_rsk(req)->saw_mpc && !mopt.saw_mpc)
  4057. + tcp_rsk(req)->saw_mpc = false;
  4058. +
  4059. +
  4060. if (!inet_rtx_syn_ack(sk, req))
  4061. req->expires = min(TCP_TIMEOUT_INIT << req->num_timeout,
  4062. TCP_RTO_MAX) + jiffies;
  4063. @@ -674,7 +732,20 @@
  4064. /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
  4065. if (req->num_timeout < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
  4066. - TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
  4067. + TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1 &&
  4068. + /* TODO MPTCP:
  4069. + * We do this here, because otherwise options sent in the third ack,
  4070. + * or duplicate fourth ack will get lost. Options like MP_PRIO, ADD_ADDR,...
  4071. + *
  4072. + * We could store them in request_sock, but this would mean that we
  4073. + * have to put tcp_options_received and mptcp_options_received in there,
  4074. + * increasing considerably the size of the request-sock.
  4075. + *
  4076. + * As soon as we have reworked the request-sock MPTCP-fields and
  4077. + * created a mptcp_request_sock structure, we can handle options
  4078. + * correclty there without increasing request_sock.
  4079. + */
  4080. + !tcp_rsk(req)->saw_mpc) {
  4081. inet_rsk(req)->acked = 1;
  4082. NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
  4083. return NULL;
  4084. @@ -686,10 +757,29 @@
  4085. * ESTABLISHED STATE. If it will be dropped after
  4086. * socket is created, wait for troubles.
  4087. */
  4088. - child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
  4089. +#ifdef CONFIG_MPTCP
  4090. + if (tcp_sk(sk)->mpc)
  4091. + /* MPTCP: We call the mptcp-specific syn_recv_sock */
  4092. + child = tcp_sk(sk)->mpcb->syn_recv_sock(sk, skb, req, NULL);
  4093. + else
  4094. +#endif
  4095. + child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb,
  4096. + req, NULL);
  4097. +
  4098. if (child == NULL)
  4099. goto listen_overflow;
  4100. + if (!is_meta_sk(sk)) {
  4101. + int ret = mptcp_check_req_master(sk, child, req, prev, &mopt);
  4102. + if (ret < 0)
  4103. + goto listen_overflow;
  4104. +
  4105. + /* MPTCP-supported */
  4106. + if (!ret)
  4107. + return tcp_sk(child)->mpcb->master_sk;
  4108. + } else {
  4109. + return mptcp_check_req_child(sk, child, req, prev, &mopt);
  4110. + }
  4111. inet_csk_reqsk_queue_unlink(sk, req, prev);
  4112. inet_csk_reqsk_queue_removed(sk, req);
  4113. @@ -739,8 +829,9 @@
  4114. {
  4115. int ret = 0;
  4116. int state = child->sk_state;
  4117. + struct sock *meta_sk = tcp_sk(child)->mpc ? mptcp_meta_sk(child) : child;
  4118. - if (!sock_owned_by_user(child)) {
  4119. + if (!sock_owned_by_user(meta_sk)) {
  4120. ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb),
  4121. skb->len);
  4122. /* Wakeup parent, send SIGIO */
  4123. @@ -751,10 +842,14 @@
  4124. * in main socket hash table and lock on listening
  4125. * socket does not protect us more.
  4126. */
  4127. - __sk_add_backlog(child, skb);
  4128. + if (tcp_sk(child)->mpc)
  4129. + skb->sk = child;
  4130. + __sk_add_backlog(meta_sk, skb);
  4131. }
  4132. - bh_unlock_sock(child);
  4133. + if (tcp_sk(child)->mpc)
  4134. + bh_unlock_sock(child);
  4135. + bh_unlock_sock(meta_sk);
  4136. sock_put(child);
  4137. return ret;
  4138. }
  4139. diff -Nur linux-3.14.45.orig/net/ipv4/tcp_output.c linux-3.14.45/net/ipv4/tcp_output.c
  4140. --- linux-3.14.45.orig/net/ipv4/tcp_output.c 2015-06-23 02:01:36.000000000 +0200
  4141. +++ linux-3.14.45/net/ipv4/tcp_output.c 2015-06-24 14:15:48.887862480 +0200
  4142. @@ -36,6 +36,12 @@
  4143. #define pr_fmt(fmt) "TCP: " fmt
  4144. +#include <net/mptcp.h>
  4145. +#include <net/mptcp_v4.h>
  4146. +#if IS_ENABLED(CONFIG_IPV6)
  4147. +#include <net/mptcp_v6.h>
  4148. +#endif
  4149. +#include <net/ipv6.h>
  4150. #include <net/tcp.h>
  4151. #include <linux/compiler.h>
  4152. @@ -72,7 +78,7 @@
  4153. int push_one, gfp_t gfp);
  4154. /* Account for new data that has been sent to the network. */
  4155. -static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
  4156. +void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
  4157. {
  4158. struct inet_connection_sock *icsk = inet_csk(sk);
  4159. struct tcp_sock *tp = tcp_sk(sk);
  4160. @@ -211,7 +217,7 @@
  4161. void tcp_select_initial_window(int __space, __u32 mss,
  4162. __u32 *rcv_wnd, __u32 *window_clamp,
  4163. int wscale_ok, __u8 *rcv_wscale,
  4164. - __u32 init_rcv_wnd)
  4165. + __u32 init_rcv_wnd, const struct sock *sk)
  4166. {
  4167. unsigned int space = (__space < 0 ? 0 : __space);
  4168. @@ -266,11 +272,15 @@
  4169. * value can be stuffed directly into th->window for an outgoing
  4170. * frame.
  4171. */
  4172. -static u16 tcp_select_window(struct sock *sk)
  4173. +u16 tcp_select_window(struct sock *sk)
  4174. {
  4175. struct tcp_sock *tp = tcp_sk(sk);
  4176. - u32 cur_win = tcp_receive_window(tp);
  4177. - u32 new_win = __tcp_select_window(sk);
  4178. + /* The window must never shrink at the meta-level. At the subflow we
  4179. + * have to allow this. Otherwise we may announce a window too large
  4180. + * for the current meta-level sk_rcvbuf.
  4181. + */
  4182. + u32 cur_win = tcp_receive_window(tp->mpc ? tcp_sk(mptcp_meta_sk(sk)) : tp);
  4183. + u32 new_win = tp->__select_window(sk);
  4184. /* Never shrink the offered window */
  4185. if (new_win < cur_win) {
  4186. @@ -283,6 +293,7 @@
  4187. */
  4188. new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
  4189. }
  4190. +
  4191. tp->rcv_wnd = new_win;
  4192. tp->rcv_wup = tp->rcv_nxt;
  4193. @@ -361,7 +372,7 @@
  4194. /* Constructs common control bits of non-data skb. If SYN/FIN is present,
  4195. * auto increment end seqno.
  4196. */
  4197. -static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
  4198. +void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
  4199. {
  4200. struct skb_shared_info *shinfo = skb_shinfo(skb);
  4201. @@ -381,7 +392,7 @@
  4202. TCP_SKB_CB(skb)->end_seq = seq;
  4203. }
  4204. -static inline bool tcp_urg_mode(const struct tcp_sock *tp)
  4205. +bool tcp_urg_mode(const struct tcp_sock *tp)
  4206. {
  4207. return tp->snd_una != tp->snd_up;
  4208. }
  4209. @@ -391,17 +402,7 @@
  4210. #define OPTION_MD5 (1 << 2)
  4211. #define OPTION_WSCALE (1 << 3)
  4212. #define OPTION_FAST_OPEN_COOKIE (1 << 8)
  4213. -
  4214. -struct tcp_out_options {
  4215. - u16 options; /* bit field of OPTION_* */
  4216. - u16 mss; /* 0 to disable */
  4217. - u8 ws; /* window scale, 0 to disable */
  4218. - u8 num_sack_blocks; /* number of SACK blocks to include */
  4219. - u8 hash_size; /* bytes in hash_location */
  4220. - __u8 *hash_location; /* temporary pointer, overloaded */
  4221. - __u32 tsval, tsecr; /* need to include OPTION_TS */
  4222. - struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
  4223. -};
  4224. +/* Before adding here - take a look at OPTION_MPTCP in include/net/mptcp.h */
  4225. /* Write previously computed TCP options to the packet.
  4226. *
  4227. @@ -417,7 +418,7 @@
  4228. * (but it may well be that other scenarios fail similarly).
  4229. */
  4230. static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
  4231. - struct tcp_out_options *opts)
  4232. + struct tcp_out_options *opts, struct sk_buff *skb)
  4233. {
  4234. u16 options = opts->options; /* mungable copy */
  4235. @@ -500,6 +501,9 @@
  4236. }
  4237. ptr += (foc->len + 3) >> 2;
  4238. }
  4239. +
  4240. + if (unlikely(OPTION_MPTCP & opts->options))
  4241. + mptcp_options_write(ptr, tp, opts, skb);
  4242. }
  4243. /* Compute TCP options for SYN packets. This is not the final
  4244. @@ -551,6 +555,8 @@
  4245. if (unlikely(!(OPTION_TS & opts->options)))
  4246. remaining -= TCPOLEN_SACKPERM_ALIGNED;
  4247. }
  4248. + if (tp->request_mptcp || tp->mpc)
  4249. + mptcp_syn_options(sk, opts, &remaining);
  4250. if (fastopen && fastopen->cookie.len >= 0) {
  4251. u32 need = TCPOLEN_EXP_FASTOPEN_BASE + fastopen->cookie.len;
  4252. @@ -624,6 +630,9 @@
  4253. }
  4254. }
  4255. + if (tcp_rsk(req)->saw_mpc)
  4256. + mptcp_synack_options(req, opts, &remaining);
  4257. +
  4258. return MAX_TCP_OPTION_SPACE - remaining;
  4259. }
  4260. @@ -657,16 +666,22 @@
  4261. opts->tsecr = tp->rx_opt.ts_recent;
  4262. size += TCPOLEN_TSTAMP_ALIGNED;
  4263. }
  4264. + if (tp->mpc)
  4265. + mptcp_established_options(sk, skb, opts, &size);
  4266. eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
  4267. if (unlikely(eff_sacks)) {
  4268. - const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
  4269. - opts->num_sack_blocks =
  4270. - min_t(unsigned int, eff_sacks,
  4271. - (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
  4272. - TCPOLEN_SACK_PERBLOCK);
  4273. - size += TCPOLEN_SACK_BASE_ALIGNED +
  4274. - opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
  4275. + const unsigned remaining = MAX_TCP_OPTION_SPACE - size;
  4276. + if (remaining < TCPOLEN_SACK_BASE_ALIGNED)
  4277. + opts->num_sack_blocks = 0;
  4278. + else
  4279. + opts->num_sack_blocks =
  4280. + min_t(unsigned int, eff_sacks,
  4281. + (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
  4282. + TCPOLEN_SACK_PERBLOCK);
  4283. + if (opts->num_sack_blocks)
  4284. + size += TCPOLEN_SACK_BASE_ALIGNED +
  4285. + opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
  4286. }
  4287. return size;
  4288. @@ -714,7 +729,7 @@
  4289. unsigned long flags;
  4290. struct list_head *q, *n;
  4291. struct tcp_sock *tp;
  4292. - struct sock *sk;
  4293. + struct sock *sk, *meta_sk;
  4294. local_irq_save(flags);
  4295. list_splice_init(&tsq->head, &list);
  4296. @@ -725,15 +740,27 @@
  4297. list_del(&tp->tsq_node);
  4298. sk = (struct sock *)tp;
  4299. - bh_lock_sock(sk);
  4300. + meta_sk = tp->mpc ? mptcp_meta_sk(sk) : sk;
  4301. + bh_lock_sock(meta_sk);
  4302. - if (!sock_owned_by_user(sk)) {
  4303. + if (!sock_owned_by_user(meta_sk)) {
  4304. tcp_tsq_handler(sk);
  4305. + if (tp->mpc)
  4306. + tcp_tsq_handler(meta_sk);
  4307. } else {
  4308. /* defer the work to tcp_release_cb() */
  4309. set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags);
  4310. +
  4311. + /* For MPTCP, we set the tsq-bit on the meta, and the
  4312. + * subflow as we don't know if the limitation happened
  4313. + * while inside mptcp_write_xmit or during tcp_write_xmit.
  4314. + */
  4315. + if (tp->mpc) {
  4316. + set_bit(TCP_TSQ_DEFERRED, &tcp_sk(meta_sk)->tsq_flags);
  4317. + mptcp_tsq_flags(sk);
  4318. + }
  4319. }
  4320. - bh_unlock_sock(sk);
  4321. + bh_unlock_sock(meta_sk);
  4322. clear_bit(TSQ_QUEUED, &tp->tsq_flags);
  4323. sk_free(sk);
  4324. @@ -743,7 +770,10 @@
  4325. #define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \
  4326. (1UL << TCP_WRITE_TIMER_DEFERRED) | \
  4327. (1UL << TCP_DELACK_TIMER_DEFERRED) | \
  4328. - (1UL << TCP_MTU_REDUCED_DEFERRED))
  4329. + (1UL << TCP_MTU_REDUCED_DEFERRED) | \
  4330. + (1UL << MPTCP_PATH_MANAGER) | \
  4331. + (1UL << MPTCP_SUB_DEFERRED))
  4332. +
  4333. /**
  4334. * tcp_release_cb - tcp release_sock() callback
  4335. * @sk: socket
  4336. @@ -790,6 +820,13 @@
  4337. inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
  4338. __sock_put(sk);
  4339. }
  4340. + if (flags & (1UL << MPTCP_PATH_MANAGER)) {
  4341. + if (tcp_sk(sk)->mpcb->pm_ops->release_sock)
  4342. + tcp_sk(sk)->mpcb->pm_ops->release_sock(sk);
  4343. + __sock_put(sk);
  4344. + }
  4345. + if (flags & (1UL << MPTCP_SUB_DEFERRED))
  4346. + mptcp_tsq_sub_deferred(sk);
  4347. }
  4348. EXPORT_SYMBOL(tcp_release_cb);
  4349. @@ -849,8 +886,8 @@
  4350. * We are working here with either a clone of the original
  4351. * SKB, or a fresh unique copy made by the retransmit engine.
  4352. */
  4353. -static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
  4354. - gfp_t gfp_mask)
  4355. +int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
  4356. + gfp_t gfp_mask)
  4357. {
  4358. const struct inet_connection_sock *icsk = inet_csk(sk);
  4359. struct inet_sock *inet;
  4360. @@ -878,10 +915,28 @@
  4361. NET_INC_STATS(sock_net(sk),
  4362. LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
  4363. - if (unlikely(skb_cloned(skb)))
  4364. - skb = pskb_copy(skb, gfp_mask);
  4365. - else
  4366. + if (unlikely(skb_cloned(skb))) {
  4367. + struct sk_buff *newskb;
  4368. + if (mptcp_is_data_seq(skb))
  4369. + skb_push(skb, MPTCP_SUB_LEN_DSS_ALIGN +
  4370. + MPTCP_SUB_LEN_ACK_ALIGN +
  4371. + MPTCP_SUB_LEN_SEQ_ALIGN);
  4372. +
  4373. + newskb = pskb_copy(skb, gfp_mask);
  4374. +
  4375. + if (mptcp_is_data_seq(skb)) {
  4376. + skb_pull(skb, MPTCP_SUB_LEN_DSS_ALIGN +
  4377. + MPTCP_SUB_LEN_ACK_ALIGN +
  4378. + MPTCP_SUB_LEN_SEQ_ALIGN);
  4379. + if (newskb)
  4380. + skb_pull(newskb, MPTCP_SUB_LEN_DSS_ALIGN +
  4381. + MPTCP_SUB_LEN_ACK_ALIGN +
  4382. + MPTCP_SUB_LEN_SEQ_ALIGN);
  4383. + }
  4384. + skb = newskb;
  4385. + } else {
  4386. skb = skb_clone(skb, gfp_mask);
  4387. + }
  4388. if (unlikely(!skb))
  4389. return -ENOBUFS;
  4390. }
  4391. @@ -929,7 +984,7 @@
  4392. */
  4393. th->window = htons(min(tp->rcv_wnd, 65535U));
  4394. } else {
  4395. - th->window = htons(tcp_select_window(sk));
  4396. + th->window = htons(tp->select_window(sk));
  4397. }
  4398. th->check = 0;
  4399. th->urg_ptr = 0;
  4400. @@ -945,7 +1000,7 @@
  4401. }
  4402. }
  4403. - tcp_options_write((__be32 *)(th + 1), tp, &opts);
  4404. + tcp_options_write((__be32 *)(th + 1), tp, &opts, skb);
  4405. if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0))
  4406. TCP_ECN_send(sk, skb, tcp_header_size);
  4407. @@ -984,7 +1039,7 @@
  4408. * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
  4409. * otherwise socket can stall.
  4410. */
  4411. -static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
  4412. +void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
  4413. {
  4414. struct tcp_sock *tp = tcp_sk(sk);
  4415. @@ -997,15 +1052,16 @@
  4416. }
  4417. /* Initialize TSO segments for a packet. */
  4418. -static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
  4419. - unsigned int mss_now)
  4420. +void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
  4421. + unsigned int mss_now)
  4422. {
  4423. struct skb_shared_info *shinfo = skb_shinfo(skb);
  4424. /* Make sure we own this skb before messing gso_size/gso_segs */
  4425. WARN_ON_ONCE(skb_cloned(skb));
  4426. - if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) {
  4427. + if (skb->len <= mss_now || (is_meta_sk(sk) && !mptcp_sk_can_gso(sk)) ||
  4428. + (!is_meta_sk(sk) && !sk_can_gso(sk)) || skb->ip_summed == CHECKSUM_NONE) {
  4429. /* Avoid the costly divide in the normal
  4430. * non-TSO case.
  4431. */
  4432. @@ -1037,7 +1093,7 @@
  4433. /* Pcount in the middle of the write queue got changed, we need to do various
  4434. * tweaks to fix counters
  4435. */
  4436. -static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
  4437. +void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
  4438. {
  4439. struct tcp_sock *tp = tcp_sk(sk);
  4440. @@ -1078,6 +1134,9 @@
  4441. int nlen;
  4442. u8 flags;
  4443. + if (tcp_sk(sk)->mpc && mptcp_is_data_seq(skb))
  4444. + mptcp_fragment(sk, skb, len, mss_now, 0);
  4445. +
  4446. if (WARN_ON(len > skb->len))
  4447. return -EINVAL;
  4448. @@ -1160,7 +1219,7 @@
  4449. * eventually). The difference is that pulled data not copied, but
  4450. * immediately discarded.
  4451. */
  4452. -static void __pskb_trim_head(struct sk_buff *skb, int len)
  4453. +void __pskb_trim_head(struct sk_buff *skb, int len)
  4454. {
  4455. struct skb_shared_info *shinfo;
  4456. int i, k, eat;
  4457. @@ -1201,6 +1260,9 @@
  4458. /* Remove acked data from a packet in the transmit queue. */
  4459. int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
  4460. {
  4461. + if (tcp_sk(sk)->mpc && !is_meta_sk(sk) && mptcp_is_data_seq(skb))
  4462. + return mptcp_trim_head(sk, skb, len);
  4463. +
  4464. if (skb_unclone(skb, GFP_ATOMIC))
  4465. return -ENOMEM;
  4466. @@ -1218,6 +1280,15 @@
  4467. if (tcp_skb_pcount(skb) > 1)
  4468. tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb));
  4469. +#ifdef CONFIG_MPTCP
  4470. + /* Some data got acked - we assume that the seq-number reached the dest.
  4471. + * Anyway, our MPTCP-option has been trimmed above - we lost it here.
  4472. + * Only remove the SEQ if the call does not come from a meta retransmit.
  4473. + */
  4474. + if (tcp_sk(sk)->mpc && !is_meta_sk(sk))
  4475. + TCP_SKB_CB(skb)->mptcp_flags &= ~MPTCPHDR_SEQ;
  4476. +#endif
  4477. +
  4478. return 0;
  4479. }
  4480. @@ -1377,7 +1448,7 @@
  4481. }
  4482. /* Congestion window validation. (RFC2861) */
  4483. -static void tcp_cwnd_validate(struct sock *sk)
  4484. +void tcp_cwnd_validate(struct sock *sk)
  4485. {
  4486. struct tcp_sock *tp = tcp_sk(sk);
  4487. @@ -1411,8 +1482,8 @@
  4488. * But we can avoid doing the divide again given we already have
  4489. * skb_pcount = skb->len / mss_now
  4490. */
  4491. -static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
  4492. - const struct sk_buff *skb)
  4493. +void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
  4494. + const struct sk_buff *skb)
  4495. {
  4496. if (skb->len < tcp_skb_pcount(skb) * mss_now)
  4497. tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
  4498. @@ -1433,19 +1504,28 @@
  4499. (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
  4500. }
  4501. /* Returns the portion of skb which can be sent right away */
  4502. -static unsigned int tcp_mss_split_point(const struct sock *sk,
  4503. - const struct sk_buff *skb,
  4504. - unsigned int mss_now,
  4505. - unsigned int max_segs,
  4506. - int nonagle)
  4507. +unsigned int tcp_mss_split_point(const struct sock *sk,
  4508. + const struct sk_buff *skb,
  4509. + unsigned int mss_now,
  4510. + unsigned int max_segs,
  4511. + int nonagle)
  4512. {
  4513. const struct tcp_sock *tp = tcp_sk(sk);
  4514. + const struct sock *meta_sk = tp->mpc ? mptcp_meta_sk(sk) : sk;
  4515. u32 partial, needed, window, max_len;
  4516. - window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
  4517. + if (!tp->mpc)
  4518. + window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
  4519. + else
  4520. + /* We need to evaluate the available space in the sending window
  4521. + * at the subflow level. However, the subflow seq has not yet
  4522. + * been set. Nevertheless we know that the caller will set it to
  4523. + * write_seq.
  4524. + */
  4525. + window = tcp_wnd_end(tp) - tp->write_seq;
  4526. max_len = mss_now * max_segs;
  4527. - if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
  4528. + if (likely(max_len <= window && skb != tcp_write_queue_tail(meta_sk)))
  4529. return max_len;
  4530. needed = min(skb->len, window);
  4531. @@ -1467,13 +1547,14 @@
  4532. /* Can at least one segment of SKB be sent right now, according to the
  4533. * congestion window rules? If so, return how many segments are allowed.
  4534. */
  4535. -static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
  4536. - const struct sk_buff *skb)
  4537. +unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
  4538. + const struct sk_buff *skb)
  4539. {
  4540. u32 in_flight, cwnd;
  4541. /* Don't be strict about the congestion window for the final FIN. */
  4542. - if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
  4543. + if (skb &&
  4544. + ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) || mptcp_is_data_fin(skb)) &&
  4545. tcp_skb_pcount(skb) == 1)
  4546. return 1;
  4547. @@ -1489,8 +1570,8 @@
  4548. * This must be invoked the first time we consider transmitting
  4549. * SKB onto the wire.
  4550. */
  4551. -static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
  4552. - unsigned int mss_now)
  4553. +int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
  4554. + unsigned int mss_now)
  4555. {
  4556. int tso_segs = tcp_skb_pcount(skb);
  4557. @@ -1505,8 +1586,8 @@
  4558. /* Return true if the Nagle test allows this packet to be
  4559. * sent now.
  4560. */
  4561. -static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
  4562. - unsigned int cur_mss, int nonagle)
  4563. +bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
  4564. + unsigned int cur_mss, int nonagle)
  4565. {
  4566. /* Nagle rule does not apply to frames, which sit in the middle of the
  4567. * write_queue (they have no chances to get new data).
  4568. @@ -1518,7 +1599,8 @@
  4569. return true;
  4570. /* Don't use the nagle rule for urgent data (or for the final FIN). */
  4571. - if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
  4572. + if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) ||
  4573. + mptcp_is_data_fin(skb))
  4574. return true;
  4575. if (!tcp_nagle_check(skb->len < cur_mss, tp, cur_mss, nonagle))
  4576. @@ -1528,9 +1610,8 @@
  4577. }
  4578. /* Does at least the first segment of SKB fit into the send window? */
  4579. -static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
  4580. - const struct sk_buff *skb,
  4581. - unsigned int cur_mss)
  4582. +bool tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb,
  4583. + unsigned int cur_mss)
  4584. {
  4585. u32 end_seq = TCP_SKB_CB(skb)->end_seq;
  4586. @@ -1549,14 +1630,16 @@
  4587. {
  4588. const struct tcp_sock *tp = tcp_sk(sk);
  4589. unsigned int cwnd_quota;
  4590. + const struct sock *meta_sk = tp->mpc ? mptcp_meta_sk(sk) : sk;
  4591. + const struct tcp_sock *meta_tp = tcp_sk(meta_sk);
  4592. - tcp_init_tso_segs(sk, skb, cur_mss);
  4593. + tcp_init_tso_segs(meta_sk, skb, cur_mss);
  4594. - if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
  4595. + if (!tcp_nagle_test(meta_tp, skb, cur_mss, nonagle))
  4596. return 0;
  4597. cwnd_quota = tcp_cwnd_test(tp, skb);
  4598. - if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
  4599. + if (cwnd_quota && !tcp_snd_wnd_test(meta_tp, skb, cur_mss))
  4600. cwnd_quota = 0;
  4601. return cwnd_quota;
  4602. @@ -1566,12 +1649,16 @@
  4603. bool tcp_may_send_now(struct sock *sk)
  4604. {
  4605. const struct tcp_sock *tp = tcp_sk(sk);
  4606. - struct sk_buff *skb = tcp_send_head(sk);
  4607. + struct sk_buff *skb;
  4608. + const struct sock *meta_sk = tp->mpc ? mptcp_meta_sk(sk) : sk;
  4609. + const struct tcp_sock *meta_tp = tcp_sk(meta_sk);
  4610. +
  4611. + skb = tcp_send_head(meta_sk);
  4612. return skb &&
  4613. tcp_snd_test(sk, skb, tcp_current_mss(sk),
  4614. - (tcp_skb_is_last(sk, skb) ?
  4615. - tp->nonagle : TCP_NAGLE_PUSH));
  4616. + (tcp_skb_is_last(meta_sk, skb) ?
  4617. + meta_tp->nonagle : TCP_NAGLE_PUSH));
  4618. }
  4619. /* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
  4620. @@ -1588,6 +1675,9 @@
  4621. int nlen = skb->len - len;
  4622. u8 flags;
  4623. + if (tcp_sk(sk)->mpc && mptcp_is_data_seq(skb))
  4624. + mptso_fragment(sk, skb, len, mss_now, gfp, 0);
  4625. +
  4626. /* All of a TSO frame must be composed of paged data. */
  4627. if (skb->len != skb->data_len)
  4628. return tcp_fragment(sk, skb, len, mss_now);
  4629. @@ -1633,29 +1723,39 @@
  4630. *
  4631. * This algorithm is from John Heffner.
  4632. */
  4633. -static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
  4634. +bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
  4635. {
  4636. struct tcp_sock *tp = tcp_sk(sk);
  4637. + struct sock *meta_sk = tp->mpc ? mptcp_meta_sk(sk) : sk;
  4638. + struct tcp_sock *meta_tp = tcp_sk(meta_sk);
  4639. const struct inet_connection_sock *icsk = inet_csk(sk);
  4640. u32 send_win, cong_win, limit, in_flight;
  4641. int win_divisor;
  4642. - if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
  4643. + if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) || mptcp_is_data_fin(skb))
  4644. goto send_now;
  4645. if (icsk->icsk_ca_state != TCP_CA_Open)
  4646. goto send_now;
  4647. /* Defer for less than two clock ticks. */
  4648. - if (tp->tso_deferred &&
  4649. - (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1)
  4650. + if (meta_tp->tso_deferred &&
  4651. + (((u32)jiffies << 1) >> 1) - (meta_tp->tso_deferred >> 1) > 1)
  4652. goto send_now;
  4653. in_flight = tcp_packets_in_flight(tp);
  4654. BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight));
  4655. - send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
  4656. + if (!tp->mpc)
  4657. + send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
  4658. + else
  4659. + /* We need to evaluate the available space in the sending window
  4660. + * at the subflow level. However, the subflow seq has not yet
  4661. + * been set. Nevertheless we know that the caller will set it to
  4662. + * write_seq.
  4663. + */
  4664. + send_win = tcp_wnd_end(tp) - tp->write_seq;
  4665. /* From in_flight test above, we know that cwnd > in_flight. */
  4666. cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
  4667. @@ -1668,7 +1768,7 @@
  4668. goto send_now;
  4669. /* Middle in queue won't get any more data, full sendable already? */
  4670. - if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
  4671. + if ((skb != tcp_write_queue_tail(meta_sk)) && (limit >= skb->len))
  4672. goto send_now;
  4673. win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
  4674. @@ -1694,13 +1794,13 @@
  4675. /* Ok, it looks like it is advisable to defer.
  4676. * Do not rearm the timer if already set to not break TCP ACK clocking.
  4677. */
  4678. - if (!tp->tso_deferred)
  4679. - tp->tso_deferred = 1 | (jiffies << 1);
  4680. + if (!meta_tp->tso_deferred)
  4681. + meta_tp->tso_deferred = 1 | (jiffies << 1);
  4682. return true;
  4683. send_now:
  4684. - tp->tso_deferred = 0;
  4685. + meta_tp->tso_deferred = 0;
  4686. return false;
  4687. }
  4688. @@ -1713,7 +1813,7 @@
  4689. * 1 if a probe was sent,
  4690. * -1 otherwise
  4691. */
  4692. -static int tcp_mtu_probe(struct sock *sk)
  4693. +int tcp_mtu_probe(struct sock *sk)
  4694. {
  4695. struct tcp_sock *tp = tcp_sk(sk);
  4696. struct inet_connection_sock *icsk = inet_csk(sk);
  4697. @@ -1858,6 +1958,9 @@
  4698. int cwnd_quota;
  4699. int result;
  4700. + if (is_meta_sk(sk))
  4701. + return mptcp_write_xmit(sk, mss_now, nonagle, push_one, gfp);
  4702. +
  4703. sent_pkts = 0;
  4704. if (!push_one) {
  4705. @@ -2314,6 +2417,10 @@
  4706. if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
  4707. return;
  4708. + /* Currently not supported for MPTCP - but it should be possible */
  4709. + if (tp->mpc)
  4710. + return;
  4711. +
  4712. tcp_for_write_queue_from_safe(skb, tmp, sk) {
  4713. if (!tcp_can_collapse(sk, skb))
  4714. break;
  4715. @@ -2411,10 +2518,26 @@
  4716. */
  4717. if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) ||
  4718. skb_headroom(skb) >= 0xFFFF)) {
  4719. - struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
  4720. - GFP_ATOMIC);
  4721. + struct sk_buff *nskb;
  4722. +
  4723. + if (mptcp_is_data_seq(skb))
  4724. + skb_push(skb, MPTCP_SUB_LEN_DSS_ALIGN +
  4725. + MPTCP_SUB_LEN_ACK_ALIGN +
  4726. + MPTCP_SUB_LEN_SEQ_ALIGN);
  4727. +
  4728. + nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
  4729. +
  4730. + if (mptcp_is_data_seq(skb)) {
  4731. + skb_pull(skb, MPTCP_SUB_LEN_DSS_ALIGN +
  4732. + MPTCP_SUB_LEN_ACK_ALIGN +
  4733. + MPTCP_SUB_LEN_SEQ_ALIGN);
  4734. + if (nskb)
  4735. + skb_pull(nskb, MPTCP_SUB_LEN_DSS_ALIGN +
  4736. + MPTCP_SUB_LEN_ACK_ALIGN +
  4737. + MPTCP_SUB_LEN_SEQ_ALIGN);
  4738. + }
  4739. err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
  4740. - -ENOBUFS;
  4741. + -ENOBUFS;
  4742. } else {
  4743. err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
  4744. }
  4745. @@ -2665,6 +2788,11 @@
  4746. {
  4747. struct sk_buff *skb;
  4748. + if (is_meta_sk(sk)) {
  4749. + mptcp_send_active_reset(sk, priority);
  4750. + return;
  4751. + }
  4752. +
  4753. /* NOTE: No TCP options attached and we never retransmit this. */
  4754. skb = alloc_skb(MAX_TCP_HEADER, priority);
  4755. if (!skb) {
  4756. @@ -2767,14 +2895,14 @@
  4757. (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
  4758. req->window_clamp = tcp_full_space(sk);
  4759. - /* tcp_full_space because it is guaranteed to be the first packet */
  4760. - tcp_select_initial_window(tcp_full_space(sk),
  4761. - mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
  4762. + tp->select_initial_window(tcp_full_space(sk),
  4763. + mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) -
  4764. + (tcp_rsk(req)->saw_mpc ? MPTCP_SUB_LEN_DSM_ALIGN : 0),
  4765. &req->rcv_wnd,
  4766. &req->window_clamp,
  4767. ireq->wscale_ok,
  4768. &rcv_wscale,
  4769. - dst_metric(dst, RTAX_INITRWND));
  4770. + dst_metric(dst, RTAX_INITRWND), sk);
  4771. ireq->rcv_wscale = rcv_wscale;
  4772. }
  4773. @@ -2810,7 +2938,7 @@
  4774. /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
  4775. th->window = htons(min(req->rcv_wnd, 65535U));
  4776. - tcp_options_write((__be32 *)(th + 1), tp, &opts);
  4777. + tcp_options_write((__be32 *)(th + 1), tp, &opts, skb);
  4778. th->doff = (tcp_header_size >> 2);
  4779. TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb));
  4780. @@ -2866,13 +2994,13 @@
  4781. (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
  4782. tp->window_clamp = tcp_full_space(sk);
  4783. - tcp_select_initial_window(tcp_full_space(sk),
  4784. + tp->select_initial_window(tcp_full_space(sk),
  4785. tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
  4786. &tp->rcv_wnd,
  4787. &tp->window_clamp,
  4788. sysctl_tcp_window_scaling,
  4789. &rcv_wscale,
  4790. - dst_metric(dst, RTAX_INITRWND));
  4791. + dst_metric(dst, RTAX_INITRWND), sk);
  4792. tp->rx_opt.rcv_wscale = rcv_wscale;
  4793. tp->rcv_ssthresh = tp->rcv_wnd;
  4794. @@ -2896,6 +3024,38 @@
  4795. inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
  4796. inet_csk(sk)->icsk_retransmits = 0;
  4797. tcp_clear_retrans(tp);
  4798. +
  4799. +#ifdef CONFIG_MPTCP
  4800. + if (sysctl_mptcp_enabled && mptcp_doit(sk)) {
  4801. + if (is_master_tp(tp)) {
  4802. + tp->request_mptcp = 1;
  4803. + mptcp_connect_init(sk);
  4804. + } else if (tp->mptcp) {
  4805. + struct inet_sock *inet = inet_sk(sk);
  4806. +
  4807. + tp->mptcp->snt_isn = tp->write_seq;
  4808. + tp->mptcp->init_rcv_wnd = tp->rcv_wnd;
  4809. +
  4810. + /* Set nonce for new subflows */
  4811. + if (sk->sk_family == AF_INET)
  4812. + tp->mptcp->mptcp_loc_nonce = mptcp_v4_get_nonce(
  4813. + inet->inet_saddr,
  4814. + inet->inet_daddr,
  4815. + inet->inet_sport,
  4816. + inet->inet_dport,
  4817. + tp->write_seq);
  4818. +#if IS_ENABLED(CONFIG_IPV6)
  4819. + else
  4820. + tp->mptcp->mptcp_loc_nonce = mptcp_v6_get_nonce(
  4821. + inet6_sk(sk)->saddr.s6_addr32,
  4822. + sk->sk_v6_daddr.s6_addr32,
  4823. + inet->inet_sport,
  4824. + inet->inet_dport,
  4825. + tp->write_seq);
  4826. +#endif
  4827. + }
  4828. + }
  4829. +#endif
  4830. }
  4831. static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
  4832. @@ -3131,6 +3291,7 @@
  4833. TCP_SKB_CB(buff)->when = tcp_time_stamp;
  4834. tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
  4835. }
  4836. +EXPORT_SYMBOL(tcp_send_ack);
  4837. /* This routine sends a packet with an out of date sequence
  4838. * number. It assumes the other end will try to ack it.
  4839. @@ -3143,7 +3304,7 @@
  4840. * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is
  4841. * out-of-date with SND.UNA-1 to probe window.
  4842. */
  4843. -static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
  4844. +int tcp_xmit_probe_skb(struct sock *sk, int urgent)
  4845. {
  4846. struct tcp_sock *tp = tcp_sk(sk);
  4847. struct sk_buff *skb;
  4848. @@ -3181,6 +3342,9 @@
  4849. if (sk->sk_state == TCP_CLOSE)
  4850. return -1;
  4851. + if (is_meta_sk(sk))
  4852. + return mptcp_write_wakeup(sk);
  4853. +
  4854. if ((skb = tcp_send_head(sk)) != NULL &&
  4855. before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
  4856. int err;
  4857. diff -Nur linux-3.14.45.orig/net/ipv4/tcp_timer.c linux-3.14.45/net/ipv4/tcp_timer.c
  4858. --- linux-3.14.45.orig/net/ipv4/tcp_timer.c 2015-06-23 02:01:36.000000000 +0200
  4859. +++ linux-3.14.45/net/ipv4/tcp_timer.c 2015-06-24 14:15:48.891862483 +0200
  4860. @@ -20,6 +20,7 @@
  4861. #include <linux/module.h>
  4862. #include <linux/gfp.h>
  4863. +#include <net/mptcp.h>
  4864. #include <net/tcp.h>
  4865. int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES;
  4866. @@ -32,7 +33,7 @@
  4867. int sysctl_tcp_orphan_retries __read_mostly;
  4868. int sysctl_tcp_thin_linear_timeouts __read_mostly;
  4869. -static void tcp_write_err(struct sock *sk)
  4870. +void tcp_write_err(struct sock *sk)
  4871. {
  4872. sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
  4873. sk->sk_error_report(sk);
  4874. @@ -124,10 +125,8 @@
  4875. * retransmissions with an initial RTO of TCP_RTO_MIN or TCP_TIMEOUT_INIT if
  4876. * syn_set flag is set.
  4877. */
  4878. -static bool retransmits_timed_out(struct sock *sk,
  4879. - unsigned int boundary,
  4880. - unsigned int timeout,
  4881. - bool syn_set)
  4882. +bool retransmits_timed_out(struct sock *sk, unsigned int boundary,
  4883. + unsigned int timeout, bool syn_set)
  4884. {
  4885. unsigned int linear_backoff_thresh, start_ts;
  4886. unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN;
  4887. @@ -153,7 +152,7 @@
  4888. }
  4889. /* A write timeout has occurred. Process the after effects. */
  4890. -static int tcp_write_timeout(struct sock *sk)
  4891. +int tcp_write_timeout(struct sock *sk)
  4892. {
  4893. struct inet_connection_sock *icsk = inet_csk(sk);
  4894. struct tcp_sock *tp = tcp_sk(sk);
  4895. @@ -168,6 +167,10 @@
  4896. }
  4897. retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
  4898. syn_set = true;
  4899. + /* Stop retransmitting MP_CAPABLE options in SYN if timed out. */
  4900. + if (tcp_sk(sk)->request_mptcp &&
  4901. + icsk->icsk_retransmits >= mptcp_sysctl_syn_retries())
  4902. + tcp_sk(sk)->request_mptcp = 0;
  4903. } else {
  4904. if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) {
  4905. /* Black hole detection */
  4906. @@ -248,18 +251,22 @@
  4907. static void tcp_delack_timer(unsigned long data)
  4908. {
  4909. struct sock *sk = (struct sock *)data;
  4910. + struct tcp_sock *tp = tcp_sk(sk);
  4911. + struct sock *meta_sk = tp->mpc ? mptcp_meta_sk(sk) : sk;
  4912. - bh_lock_sock(sk);
  4913. - if (!sock_owned_by_user(sk)) {
  4914. + bh_lock_sock(meta_sk);
  4915. + if (!sock_owned_by_user(meta_sk)) {
  4916. tcp_delack_timer_handler(sk);
  4917. } else {
  4918. inet_csk(sk)->icsk_ack.blocked = 1;
  4919. - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
  4920. + NET_INC_STATS_BH(sock_net(meta_sk), LINUX_MIB_DELAYEDACKLOCKED);
  4921. /* deleguate our work to tcp_release_cb() */
  4922. if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
  4923. sock_hold(sk);
  4924. + if (tp->mpc)
  4925. + mptcp_tsq_flags(sk);
  4926. }
  4927. - bh_unlock_sock(sk);
  4928. + bh_unlock_sock(meta_sk);
  4929. sock_put(sk);
  4930. }
  4931. @@ -421,6 +428,9 @@
  4932. tcp_enter_loss(sk, 0);
  4933. + if (tp->mpc)
  4934. + mptcp_reinject_data(sk, 1);
  4935. +
  4936. if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) {
  4937. /* Retransmission failed because of local congestion,
  4938. * do not backoff.
  4939. @@ -471,6 +481,8 @@
  4940. /* Use normal (exponential) backoff */
  4941. icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
  4942. }
  4943. + if (tp->mpc)
  4944. + mptcp_set_rto(sk);
  4945. inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
  4946. if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0, 0))
  4947. __sk_dst_reset(sk);
  4948. @@ -502,7 +514,10 @@
  4949. break;
  4950. case ICSK_TIME_RETRANS:
  4951. icsk->icsk_pending = 0;
  4952. - tcp_retransmit_timer(sk);
  4953. + if (is_meta_sk(sk))
  4954. + mptcp_retransmit_timer(sk);
  4955. + else
  4956. + tcp_retransmit_timer(sk);
  4957. break;
  4958. case ICSK_TIME_PROBE0:
  4959. icsk->icsk_pending = 0;
  4960. @@ -517,16 +532,19 @@
  4961. static void tcp_write_timer(unsigned long data)
  4962. {
  4963. struct sock *sk = (struct sock *)data;
  4964. + struct sock *meta_sk = tcp_sk(sk)->mpc ? mptcp_meta_sk(sk) : sk;
  4965. - bh_lock_sock(sk);
  4966. - if (!sock_owned_by_user(sk)) {
  4967. + bh_lock_sock(meta_sk);
  4968. + if (!sock_owned_by_user(meta_sk)) {
  4969. tcp_write_timer_handler(sk);
  4970. } else {
  4971. /* deleguate our work to tcp_release_cb() */
  4972. if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
  4973. sock_hold(sk);
  4974. + if (tcp_sk(sk)->mpc)
  4975. + mptcp_tsq_flags(sk);
  4976. }
  4977. - bh_unlock_sock(sk);
  4978. + bh_unlock_sock(meta_sk);
  4979. sock_put(sk);
  4980. }
  4981. @@ -563,11 +581,12 @@
  4982. struct sock *sk = (struct sock *) data;
  4983. struct inet_connection_sock *icsk = inet_csk(sk);
  4984. struct tcp_sock *tp = tcp_sk(sk);
  4985. + struct sock *meta_sk = tp->mpc ? mptcp_meta_sk(sk) : sk;
  4986. u32 elapsed;
  4987. /* Only process if socket is not in use. */
  4988. - bh_lock_sock(sk);
  4989. - if (sock_owned_by_user(sk)) {
  4990. + bh_lock_sock(meta_sk);
  4991. + if (sock_owned_by_user(meta_sk)) {
  4992. /* Try again later. */
  4993. inet_csk_reset_keepalive_timer (sk, HZ/20);
  4994. goto out;
  4995. @@ -578,6 +597,29 @@
  4996. goto out;
  4997. }
  4998. + if (tp->send_mp_fclose) {
  4999. + /* MUST do this before tcp_write_timeout, because retrans_stamp
  5000. + * may have been set to 0 in another part while we are
  5001. + * retransmitting MP_FASTCLOSE. Then, we would crash, because
  5002. + * retransmits_timed_out accesses the meta-write-queue.
  5003. + *
  5004. + * We make sure that the timestamp is != 0.
  5005. + */
  5006. + if (!tp->retrans_stamp)
  5007. + tp->retrans_stamp = tcp_time_stamp ? : 1;
  5008. +
  5009. + if (tcp_write_timeout(sk))
  5010. + goto out;
  5011. +
  5012. + tcp_send_ack(sk);
  5013. + icsk->icsk_backoff++;
  5014. + icsk->icsk_retransmits++;
  5015. +
  5016. + icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
  5017. + elapsed = icsk->icsk_rto;
  5018. + goto resched;
  5019. + }
  5020. +
  5021. if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
  5022. if (tp->linger2 >= 0) {
  5023. const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
  5024. @@ -639,7 +681,7 @@
  5025. tcp_done(sk);
  5026. out:
  5027. - bh_unlock_sock(sk);
  5028. + bh_unlock_sock(meta_sk);
  5029. sock_put(sk);
  5030. }
  5031. diff -Nur linux-3.14.45.orig/net/ipv6/addrconf.c linux-3.14.45/net/ipv6/addrconf.c
  5032. --- linux-3.14.45.orig/net/ipv6/addrconf.c 2015-06-23 02:01:36.000000000 +0200
  5033. +++ linux-3.14.45/net/ipv6/addrconf.c 2015-06-24 14:15:48.891862483 +0200
  5034. @@ -765,6 +765,7 @@
  5035. kfree_rcu(ifp, rcu);
  5036. }
  5037. +EXPORT_SYMBOL(inet6_ifa_finish_destroy);
  5038. static void
  5039. ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
  5040. diff -Nur linux-3.14.45.orig/net/ipv6/af_inet6.c linux-3.14.45/net/ipv6/af_inet6.c
  5041. --- linux-3.14.45.orig/net/ipv6/af_inet6.c 2015-06-23 02:01:36.000000000 +0200
  5042. +++ linux-3.14.45/net/ipv6/af_inet6.c 2015-06-24 14:15:48.891862483 +0200
  5043. @@ -97,8 +97,7 @@
  5044. return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
  5045. }
  5046. -static int inet6_create(struct net *net, struct socket *sock, int protocol,
  5047. - int kern)
  5048. +int inet6_create(struct net *net, struct socket *sock, int protocol, int kern)
  5049. {
  5050. struct inet_sock *inet;
  5051. struct ipv6_pinfo *np;
  5052. diff -Nur linux-3.14.45.orig/net/ipv6/inet6_connection_sock.c linux-3.14.45/net/ipv6/inet6_connection_sock.c
  5053. --- linux-3.14.45.orig/net/ipv6/inet6_connection_sock.c 2015-06-23 02:01:36.000000000 +0200
  5054. +++ linux-3.14.45/net/ipv6/inet6_connection_sock.c 2015-06-24 14:15:48.891862483 +0200
  5055. @@ -96,8 +96,8 @@
  5056. /*
  5057. * request_sock (formerly open request) hash tables.
  5058. */
  5059. -static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
  5060. - const u32 rnd, const u32 synq_hsize)
  5061. +u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
  5062. + const u32 rnd, const u32 synq_hsize)
  5063. {
  5064. u32 c;
  5065. diff -Nur linux-3.14.45.orig/net/ipv6/syncookies.c linux-3.14.45/net/ipv6/syncookies.c
  5066. --- linux-3.14.45.orig/net/ipv6/syncookies.c 2015-06-23 02:01:36.000000000 +0200
  5067. +++ linux-3.14.45/net/ipv6/syncookies.c 2015-06-24 14:15:48.891862483 +0200
  5068. @@ -181,7 +181,7 @@
  5069. /* check for timestamp cookie support */
  5070. memset(&tcp_opt, 0, sizeof(tcp_opt));
  5071. - tcp_parse_options(skb, &tcp_opt, 0, NULL);
  5072. + tcp_parse_options(skb, &tcp_opt, NULL, 0, NULL);
  5073. if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok))
  5074. goto out;
  5075. @@ -253,10 +253,10 @@
  5076. }
  5077. req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
  5078. - tcp_select_initial_window(tcp_full_space(sk), req->mss,
  5079. + tp->select_initial_window(tcp_full_space(sk), req->mss,
  5080. &req->rcv_wnd, &req->window_clamp,
  5081. ireq->wscale_ok, &rcv_wscale,
  5082. - dst_metric(dst, RTAX_INITRWND));
  5083. + dst_metric(dst, RTAX_INITRWND), sk);
  5084. ireq->rcv_wscale = rcv_wscale;
  5085. diff -Nur linux-3.14.45.orig/net/ipv6/tcp_ipv6.c linux-3.14.45/net/ipv6/tcp_ipv6.c
  5086. --- linux-3.14.45.orig/net/ipv6/tcp_ipv6.c 2015-06-23 02:01:36.000000000 +0200
  5087. +++ linux-3.14.45/net/ipv6/tcp_ipv6.c 2015-06-24 14:44:57.517799806 +0200
  5088. @@ -63,6 +63,8 @@
  5089. #include <net/inet_common.h>
  5090. #include <net/secure_seq.h>
  5091. #include <net/tcp_memcontrol.h>
  5092. +#include <net/mptcp.h>
  5093. +#include <net/mptcp_v6.h>
  5094. #include <net/busy_poll.h>
  5095. #include <asm/uaccess.h>
  5096. @@ -73,14 +75,6 @@
  5097. #include <linux/crypto.h>
  5098. #include <linux/scatterlist.h>
  5099. -static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
  5100. -static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
  5101. - struct request_sock *req);
  5102. -
  5103. -static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
  5104. -
  5105. -static const struct inet_connection_sock_af_ops ipv6_mapped;
  5106. -static const struct inet_connection_sock_af_ops ipv6_specific;
  5107. #ifdef CONFIG_TCP_MD5SIG
  5108. static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
  5109. static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
  5110. @@ -92,7 +86,7 @@
  5111. }
  5112. #endif
  5113. -static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
  5114. +void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
  5115. {
  5116. struct dst_entry *dst = skb_dst(skb);
  5117. const struct rt6_info *rt = (const struct rt6_info *)dst;
  5118. @@ -104,7 +98,7 @@
  5119. inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
  5120. }
  5121. -static void tcp_v6_hash(struct sock *sk)
  5122. +void tcp_v6_hash(struct sock *sk)
  5123. {
  5124. if (sk->sk_state != TCP_CLOSE) {
  5125. if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
  5126. @@ -117,7 +111,7 @@
  5127. }
  5128. }
  5129. -static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
  5130. +__u32 tcp_v6_init_sequence(const struct sk_buff *skb)
  5131. {
  5132. return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
  5133. ipv6_hdr(skb)->saddr.s6_addr32,
  5134. @@ -125,7 +119,7 @@
  5135. tcp_hdr(skb)->source);
  5136. }
  5137. -static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
  5138. +int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
  5139. int addr_len)
  5140. {
  5141. struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
  5142. @@ -339,7 +333,7 @@
  5143. const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
  5144. const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
  5145. struct ipv6_pinfo *np;
  5146. - struct sock *sk;
  5147. + struct sock *sk, *meta_sk;
  5148. int err;
  5149. struct tcp_sock *tp;
  5150. __u32 seq;
  5151. @@ -359,8 +353,14 @@
  5152. return;
  5153. }
  5154. - bh_lock_sock(sk);
  5155. - if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
  5156. + tp = tcp_sk(sk);
  5157. + if (tp->mpc)
  5158. + meta_sk = mptcp_meta_sk(sk);
  5159. + else
  5160. + meta_sk = sk;
  5161. +
  5162. + bh_lock_sock(meta_sk);
  5163. + if (sock_owned_by_user(meta_sk) && type != ICMPV6_PKT_TOOBIG)
  5164. NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
  5165. if (sk->sk_state == TCP_CLOSE)
  5166. @@ -371,7 +371,6 @@
  5167. goto out;
  5168. }
  5169. - tp = tcp_sk(sk);
  5170. seq = ntohl(th->seq);
  5171. if (sk->sk_state != TCP_LISTEN &&
  5172. !between(seq, tp->snd_una, tp->snd_nxt)) {
  5173. @@ -401,11 +400,15 @@
  5174. goto out;
  5175. tp->mtu_info = ntohl(info);
  5176. - if (!sock_owned_by_user(sk))
  5177. + if (!sock_owned_by_user(meta_sk))
  5178. tcp_v6_mtu_reduced(sk);
  5179. - else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
  5180. + else {
  5181. + if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
  5182. &tp->tsq_flags))
  5183. - sock_hold(sk);
  5184. + sock_hold(sk);
  5185. + if (tp->mpc)
  5186. + mptcp_tsq_flags(sk);
  5187. + }
  5188. goto out;
  5189. }
  5190. @@ -415,7 +418,7 @@
  5191. switch (sk->sk_state) {
  5192. struct request_sock *req, **prev;
  5193. case TCP_LISTEN:
  5194. - if (sock_owned_by_user(sk))
  5195. + if (sock_owned_by_user(meta_sk))
  5196. goto out;
  5197. req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
  5198. @@ -440,7 +443,7 @@
  5199. case TCP_SYN_SENT:
  5200. case TCP_SYN_RECV: /* Cannot happen.
  5201. It can, it SYNs are crossed. --ANK */
  5202. - if (!sock_owned_by_user(sk)) {
  5203. + if (!sock_owned_by_user(meta_sk)) {
  5204. sk->sk_err = err;
  5205. sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
  5206. @@ -450,22 +453,22 @@
  5207. goto out;
  5208. }
  5209. - if (!sock_owned_by_user(sk) && np->recverr) {
  5210. + if (!sock_owned_by_user(meta_sk) && np->recverr) {
  5211. sk->sk_err = err;
  5212. sk->sk_error_report(sk);
  5213. } else
  5214. sk->sk_err_soft = err;
  5215. out:
  5216. - bh_unlock_sock(sk);
  5217. + bh_unlock_sock(meta_sk);
  5218. sock_put(sk);
  5219. }
  5220. -static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
  5221. - struct flowi6 *fl6,
  5222. - struct request_sock *req,
  5223. - u16 queue_mapping)
  5224. +int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
  5225. + struct flowi6 *fl6,
  5226. + struct request_sock *req,
  5227. + u16 queue_mapping)
  5228. {
  5229. struct inet_request_sock *ireq = inet_rsk(req);
  5230. struct ipv6_pinfo *np = inet6_sk(sk);
  5231. @@ -495,7 +498,7 @@
  5232. return err;
  5233. }
  5234. -static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
  5235. +int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
  5236. {
  5237. struct flowi6 fl6;
  5238. int res;
  5239. @@ -506,7 +509,7 @@
  5240. return res;
  5241. }
  5242. -static void tcp_v6_reqsk_destructor(struct request_sock *req)
  5243. +void tcp_v6_reqsk_destructor(struct request_sock *req)
  5244. {
  5245. kfree_skb(inet_rsk(req)->pktopts);
  5246. }
  5247. @@ -719,16 +722,16 @@
  5248. };
  5249. #ifdef CONFIG_TCP_MD5SIG
  5250. -static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
  5251. +const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
  5252. .md5_lookup = tcp_v6_reqsk_md5_lookup,
  5253. .calc_md5_hash = tcp_v6_md5_hash_skb,
  5254. };
  5255. #endif
  5256. -static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
  5257. - u32 tsval, u32 tsecr,
  5258. +static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack,
  5259. + u32 data_ack, u32 win, u32 tsval, u32 tsecr,
  5260. struct tcp_md5sig_key *key, int rst, u8 tclass,
  5261. - u32 label)
  5262. + u32 label, int mptcp)
  5263. {
  5264. const struct tcphdr *th = tcp_hdr(skb);
  5265. struct tcphdr *t1;
  5266. @@ -746,7 +749,10 @@
  5267. if (key)
  5268. tot_len += TCPOLEN_MD5SIG_ALIGNED;
  5269. #endif
  5270. -
  5271. +#ifdef CONFIG_MPTCP
  5272. + if (mptcp)
  5273. + tot_len += MPTCP_SUB_LEN_DSS + MPTCP_SUB_LEN_ACK;
  5274. +#endif
  5275. buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
  5276. GFP_ATOMIC);
  5277. if (buff == NULL)
  5278. @@ -784,6 +790,17 @@
  5279. tcp_v6_md5_hash_hdr((__u8 *)topt, key,
  5280. &ipv6_hdr(skb)->saddr,
  5281. &ipv6_hdr(skb)->daddr, t1);
  5282. + topt += 4;
  5283. + }
  5284. +#endif
  5285. +#ifdef CONFIG_MPTCP
  5286. + if (mptcp) {
  5287. + /* Construction of 32-bit data_ack */
  5288. + *topt++ = htonl((TCPOPT_MPTCP << 24) |
  5289. + ((MPTCP_SUB_LEN_DSS + MPTCP_SUB_LEN_ACK) << 16) |
  5290. + (0x20 << 8) |
  5291. + (0x01));
  5292. + *topt++ = htonl(data_ack);
  5293. }
  5294. #endif
  5295. @@ -821,7 +838,7 @@
  5296. kfree_skb(buff);
  5297. }
  5298. -static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
  5299. +void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
  5300. {
  5301. const struct tcphdr *th = tcp_hdr(skb);
  5302. u32 seq = 0, ack_seq = 0;
  5303. @@ -876,7 +893,7 @@
  5304. ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
  5305. (th->doff << 2);
  5306. - tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, key, 1, 0, 0);
  5307. + tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, 0, key, 1, 0, 0, 0);
  5308. #ifdef CONFIG_TCP_MD5SIG
  5309. release_sk1:
  5310. @@ -887,40 +904,48 @@
  5311. #endif
  5312. }
  5313. -static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
  5314. +static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 data_ack,
  5315. u32 win, u32 tsval, u32 tsecr,
  5316. - struct tcp_md5sig_key *key, u8 tclass,
  5317. - u32 label)
  5318. + struct tcp_md5sig_key *key, u8 tclass, u32 label,
  5319. + int mptcp)
  5320. {
  5321. tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, key, 0, tclass,
  5322. - label);
  5323. + label, mptcp);
  5324. }
  5325. static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
  5326. {
  5327. struct inet_timewait_sock *tw = inet_twsk(sk);
  5328. struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
  5329. + u32 data_ack = 0;
  5330. + int mptcp = 0;
  5331. +
  5332. + if (tcptw->mptcp_tw && tcptw->mptcp_tw->meta_tw) {
  5333. + data_ack = (u32)tcptw->mptcp_tw->rcv_nxt;
  5334. + mptcp = 1;
  5335. + }
  5336. tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
  5337. + data_ack,
  5338. tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
  5339. tcp_time_stamp + tcptw->tw_ts_offset,
  5340. tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw),
  5341. - tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
  5342. + tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), mptcp);
  5343. inet_twsk_put(tw);
  5344. }
  5345. -static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
  5346. +void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
  5347. struct request_sock *req)
  5348. {
  5349. tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1,
  5350. - req->rcv_wnd, tcp_time_stamp, req->ts_recent,
  5351. + 0, req->rcv_wnd, tcp_time_stamp, req->ts_recent,
  5352. tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
  5353. - 0, 0);
  5354. + 0, 0, 0);
  5355. }
  5356. -static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
  5357. +struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
  5358. {
  5359. struct request_sock *req, **prev;
  5360. const struct tcphdr *th = tcp_hdr(skb);
  5361. @@ -939,7 +964,13 @@
  5362. if (nsk) {
  5363. if (nsk->sk_state != TCP_TIME_WAIT) {
  5364. + /* Don't lock again the meta-sk. It has been locked
  5365. + * before mptcp_v6_do_rcv.
  5366. + */
  5367. + if (tcp_sk(nsk)->mpc && !is_meta_sk(sk))
  5368. + bh_lock_sock(mptcp_meta_sk(nsk));
  5369. bh_lock_sock(nsk);
  5370. +
  5371. return nsk;
  5372. }
  5373. inet_twsk_put(inet_twsk(nsk));
  5374. @@ -959,6 +990,7 @@
  5375. static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
  5376. {
  5377. struct tcp_options_received tmp_opt;
  5378. + struct mptcp_options_received mopt;
  5379. struct request_sock *req;
  5380. struct inet_request_sock *ireq;
  5381. struct ipv6_pinfo *np = inet6_sk(sk);
  5382. @@ -971,6 +1003,23 @@
  5383. if (skb->protocol == htons(ETH_P_IP))
  5384. return tcp_v4_conn_request(sk, skb);
  5385. + tcp_clear_options(&tmp_opt);
  5386. + tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
  5387. + tmp_opt.user_mss = tp->rx_opt.user_mss;
  5388. + mptcp_init_mp_opt(&mopt);
  5389. + tcp_parse_options(skb, &tmp_opt, &mopt, 0, NULL);
  5390. +
  5391. +#ifdef CONFIG_MPTCP
  5392. + /*MPTCP structures not initialized, so return error */
  5393. + if (mptcp_init_failed)
  5394. + mptcp_init_mp_opt(&mopt);
  5395. +
  5396. + if (mopt.is_mp_join)
  5397. + return mptcp_do_join_short(skb, &mopt, &tmp_opt, sock_net(sk));
  5398. + if (mopt.drop_me)
  5399. + goto drop;
  5400. +#endif
  5401. +
  5402. if (!ipv6_unicast_destination(skb))
  5403. goto drop;
  5404. @@ -986,7 +1035,22 @@
  5405. goto drop;
  5406. }
  5407. - req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
  5408. +#ifdef CONFIG_MPTCP
  5409. + if (sysctl_mptcp_enabled == MPTCP_APP && !tp->mptcp_enabled)
  5410. + mopt.saw_mpc = 0;
  5411. + if (mopt.saw_mpc && !want_cookie) {
  5412. + req = inet6_reqsk_alloc(&mptcp6_request_sock_ops);
  5413. +
  5414. + if (req == NULL)
  5415. + goto drop;
  5416. +
  5417. + mptcp_rsk(req)->mpcb = NULL;
  5418. + mptcp_rsk(req)->dss_csum = mopt.dss_csum;
  5419. + mptcp_rsk(req)->collide_tk.pprev = NULL;
  5420. + } else
  5421. +#endif
  5422. + req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
  5423. +
  5424. if (req == NULL)
  5425. goto drop;
  5426. @@ -994,17 +1058,15 @@
  5427. tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
  5428. #endif
  5429. - tcp_clear_options(&tmp_opt);
  5430. - tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
  5431. - tmp_opt.user_mss = tp->rx_opt.user_mss;
  5432. - tcp_parse_options(skb, &tmp_opt, 0, NULL);
  5433. -
  5434. if (want_cookie && !tmp_opt.saw_tstamp)
  5435. tcp_clear_options(&tmp_opt);
  5436. tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
  5437. tcp_openreq_init(req, &tmp_opt, skb);
  5438. + if (mopt.saw_mpc && !want_cookie)
  5439. + mptcp_reqsk_new_mptcp(req, &tmp_opt, &mopt, skb);
  5440. +
  5441. ireq = inet_rsk(req);
  5442. ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
  5443. ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
  5444. @@ -1094,9 +1156,9 @@
  5445. return 0; /* don't send reset */
  5446. }
  5447. -static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
  5448. - struct request_sock *req,
  5449. - struct dst_entry *dst)
  5450. +struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
  5451. + struct request_sock *req,
  5452. + struct dst_entry *dst)
  5453. {
  5454. struct inet_request_sock *ireq;
  5455. struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
  5456. @@ -1317,7 +1379,7 @@
  5457. * This is because we cannot sleep with the original spinlock
  5458. * held.
  5459. */
  5460. -static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
  5461. +int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
  5462. {
  5463. struct ipv6_pinfo *np = inet6_sk(sk);
  5464. struct tcp_sock *tp;
  5465. @@ -1339,6 +1401,9 @@
  5466. goto discard;
  5467. #endif
  5468. + if (is_meta_sk(sk))
  5469. + return mptcp_v6_do_rcv(sk, skb);
  5470. +
  5471. if (sk_filter(sk, skb))
  5472. goto discard;
  5473. @@ -1460,7 +1525,7 @@
  5474. {
  5475. const struct tcphdr *th;
  5476. const struct ipv6hdr *hdr;
  5477. - struct sock *sk;
  5478. + struct sock *sk, *meta_sk = NULL;
  5479. int ret;
  5480. struct net *net = dev_net(skb->dev);
  5481. @@ -1491,18 +1556,43 @@
  5482. TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
  5483. skb->len - th->doff*4);
  5484. TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
  5485. +#ifdef CONFIG_MPTCP
  5486. + TCP_SKB_CB(skb)->mptcp_flags = 0;
  5487. + TCP_SKB_CB(skb)->dss_off = 0;
  5488. +#endif
  5489. TCP_SKB_CB(skb)->when = 0;
  5490. TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
  5491. TCP_SKB_CB(skb)->sacked = 0;
  5492. sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
  5493. - if (!sk)
  5494. - goto no_tcp_socket;
  5495. process:
  5496. - if (sk->sk_state == TCP_TIME_WAIT)
  5497. + if (sk && sk->sk_state == TCP_TIME_WAIT)
  5498. goto do_time_wait;
  5499. +#ifdef CONFIG_MPTCP
  5500. + if (!sk && th->syn && !th->ack) {
  5501. + int ret = mptcp_lookup_join(skb, NULL);
  5502. +
  5503. + if (ret < 0) {
  5504. + tcp_v6_send_reset(NULL, skb);
  5505. + goto discard_it;
  5506. + } else if (ret > 0) {
  5507. + return 0;
  5508. + }
  5509. + }
  5510. +
  5511. + /* Is there a pending request sock for this segment ? */
  5512. + if ((!sk || sk->sk_state == TCP_LISTEN) && mptcp_check_req(skb, net)) {
  5513. + if (sk)
  5514. + sock_put(sk);
  5515. + return 0;
  5516. + }
  5517. +#endif
  5518. +
  5519. + if (!sk)
  5520. + goto no_tcp_socket;
  5521. +
  5522. if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
  5523. NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
  5524. goto discard_and_relse;
  5525. @@ -1517,11 +1607,21 @@
  5526. sk_mark_napi_id(sk, skb);
  5527. skb->dev = NULL;
  5528. - bh_lock_sock_nested(sk);
  5529. + if (tcp_sk(sk)->mpc) {
  5530. + meta_sk = mptcp_meta_sk(sk);
  5531. +
  5532. + bh_lock_sock_nested(meta_sk);
  5533. + if (sock_owned_by_user(meta_sk))
  5534. + skb->sk = sk;
  5535. + } else {
  5536. + meta_sk = sk;
  5537. + bh_lock_sock_nested(sk);
  5538. + }
  5539. +
  5540. ret = 0;
  5541. - if (!sock_owned_by_user(sk)) {
  5542. + if (!sock_owned_by_user(meta_sk)) {
  5543. #ifdef CONFIG_NET_DMA
  5544. - struct tcp_sock *tp = tcp_sk(sk);
  5545. + struct tcp_sock *tp = tcp_sk(meta_sk);
  5546. if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
  5547. tp->ucopy.dma_chan = net_dma_find_channel();
  5548. if (tp->ucopy.dma_chan)
  5549. @@ -1529,16 +1629,17 @@
  5550. else
  5551. #endif
  5552. {
  5553. - if (!tcp_prequeue(sk, skb))
  5554. + if (!tcp_prequeue(meta_sk, skb))
  5555. ret = tcp_v6_do_rcv(sk, skb);
  5556. }
  5557. - } else if (unlikely(sk_add_backlog(sk, skb,
  5558. - sk->sk_rcvbuf + sk->sk_sndbuf))) {
  5559. - bh_unlock_sock(sk);
  5560. + } else if (unlikely(sk_add_backlog(meta_sk, skb,
  5561. + meta_sk->sk_rcvbuf + meta_sk->sk_sndbuf))) {
  5562. + bh_unlock_sock(meta_sk);
  5563. NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
  5564. goto discard_and_relse;
  5565. }
  5566. - bh_unlock_sock(sk);
  5567. +
  5568. + bh_unlock_sock(meta_sk);
  5569. sock_put(sk);
  5570. return ret ? -1 : 0;
  5571. @@ -1595,6 +1696,18 @@
  5572. sk = sk2;
  5573. goto process;
  5574. }
  5575. +#ifdef CONFIG_MPTCP
  5576. + if (th->syn && !th->ack) {
  5577. + int ret = mptcp_lookup_join(skb, inet_twsk(sk));
  5578. +
  5579. + if (ret < 0) {
  5580. + tcp_v6_send_reset(NULL, skb);
  5581. + goto discard_it;
  5582. + } else if (ret > 0) {
  5583. + return 0;
  5584. + }
  5585. + }
  5586. +#endif
  5587. /* Fall through to ACK */
  5588. }
  5589. case TCP_TW_ACK:
  5590. @@ -1644,13 +1757,13 @@
  5591. }
  5592. }
  5593. -static struct timewait_sock_ops tcp6_timewait_sock_ops = {
  5594. +struct timewait_sock_ops tcp6_timewait_sock_ops = {
  5595. .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
  5596. .twsk_unique = tcp_twsk_unique,
  5597. .twsk_destructor= tcp_twsk_destructor,
  5598. };
  5599. -static const struct inet_connection_sock_af_ops ipv6_specific = {
  5600. +const struct inet_connection_sock_af_ops ipv6_specific = {
  5601. .queue_xmit = inet6_csk_xmit,
  5602. .send_check = tcp_v6_send_check,
  5603. .rebuild_header = inet6_sk_rebuild_header,
  5604. @@ -1683,7 +1796,7 @@
  5605. * TCP over IPv4 via INET6 API
  5606. */
  5607. -static const struct inet_connection_sock_af_ops ipv6_mapped = {
  5608. +const struct inet_connection_sock_af_ops ipv6_mapped = {
  5609. .queue_xmit = ip_queue_xmit,
  5610. .send_check = tcp_v4_send_check,
  5611. .rebuild_header = inet_sk_rebuild_header,
  5612. @@ -1729,7 +1842,7 @@
  5613. return 0;
  5614. }
  5615. -static void tcp_v6_destroy_sock(struct sock *sk)
  5616. +void tcp_v6_destroy_sock(struct sock *sk)
  5617. {
  5618. tcp_v4_destroy_sock(sk);
  5619. inet6_destroy_sock(sk);
  5620. diff -Nur linux-3.14.45.orig/net/mptcp/Kconfig linux-3.14.45/net/mptcp/Kconfig
  5621. --- linux-3.14.45.orig/net/mptcp/Kconfig 1970-01-01 01:00:00.000000000 +0100
  5622. +++ linux-3.14.45/net/mptcp/Kconfig 2015-06-24 14:15:48.891862483 +0200
  5623. @@ -0,0 +1,58 @@
  5624. +#
  5625. +# MPTCP configuration
  5626. +#
  5627. +config MPTCP
  5628. + bool "MPTCP protocol"
  5629. + depends on (IPV6=y || IPV6=n)
  5630. + ---help---
  5631. + This replaces the normal TCP stack with a Multipath TCP stack,
  5632. + able to use several paths at once.
  5633. +
  5634. +menuconfig MPTCP_PM_ADVANCED
  5635. + bool "MPTCP: advanced path-manager control"
  5636. + depends on MPTCP=y
  5637. + ---help---
  5638. + Support for selection of different path-managers. You should choose 'Y' here,
  5639. + because otherwise you will not actively create new MPTCP-subflows.
  5640. +
  5641. +if MPTCP_PM_ADVANCED
  5642. +
  5643. +config MPTCP_FULLMESH
  5644. + tristate "MPTCP Full-Mesh Path-Manager"
  5645. + depends on MPTCP=y
  5646. + ---help---
  5647. + This path-management module will create a full-mesh among all IP-addresses.
  5648. +
  5649. +config MPTCP_NDIFFPORTS
  5650. + tristate "MPTCP ndiff-ports"
  5651. + depends on MPTCP=y
  5652. + ---help---
  5653. + This path-management module will create multiple subflows between the same
  5654. + pair of IP-addresses, modifying the source-port. You can set the number
  5655. + of subflows via the mptcp_ndiffports-sysctl.
  5656. +
  5657. +choice
  5658. + prompt "Default MPTCP Path-Manager"
  5659. + default DEFAULT
  5660. + help
  5661. + Select the Path-Manager of your choice
  5662. +
  5663. + config DEFAULT_FULLMESH
  5664. + bool "Full mesh" if MPTCP_FULLMESH=y
  5665. +
  5666. + config DEFAULT_NDIFFPORTS
  5667. + bool "ndiff-ports" if MPTCP_NDIFFPORTS=y
  5668. +
  5669. + config DEFAULT_DUMMY
  5670. + bool "Default"
  5671. +
  5672. +endchoice
  5673. +
  5674. +endif
  5675. +
  5676. +config DEFAULT_MPTCP_PM
  5677. + string
  5678. + default "default" if DEFAULT_DUMMY
  5679. + default "fullmesh" if DEFAULT_FULLMESH
  5680. + default "ndiffports" if DEFAULT_NDIFFPORTS
  5681. + default "default"
  5682. diff -Nur linux-3.14.45.orig/net/mptcp/Makefile linux-3.14.45/net/mptcp/Makefile
  5683. --- linux-3.14.45.orig/net/mptcp/Makefile 1970-01-01 01:00:00.000000000 +0100
  5684. +++ linux-3.14.45/net/mptcp/Makefile 2015-06-24 14:15:48.891862483 +0200
  5685. @@ -0,0 +1,18 @@
  5686. +#
  5687. +## Makefile for MultiPath TCP support code.
  5688. +#
  5689. +#
  5690. +
  5691. +obj-$(CONFIG_MPTCP) += mptcp.o
  5692. +
  5693. +mptcp-y := mptcp_ctrl.o mptcp_ipv4.o mptcp_ofo_queue.o mptcp_pm.o \
  5694. + mptcp_output.o mptcp_input.o
  5695. +
  5696. +obj-$(CONFIG_TCP_CONG_COUPLED) += mptcp_coupled.o
  5697. +obj-$(CONFIG_TCP_CONG_OLIA) += mptcp_olia.o
  5698. +obj-$(CONFIG_TCP_CONG_WVEGAS) += mptcp_wvegas.o
  5699. +obj-$(CONFIG_MPTCP_FULLMESH) += mptcp_fullmesh.o
  5700. +obj-$(CONFIG_MPTCP_NDIFFPORTS) += mptcp_ndiffports.o
  5701. +
  5702. +mptcp-$(subst m,y,$(CONFIG_IPV6)) += mptcp_ipv6.o
  5703. +
  5704. diff -Nur linux-3.14.45.orig/net/mptcp/mptcp_coupled.c linux-3.14.45/net/mptcp/mptcp_coupled.c
  5705. --- linux-3.14.45.orig/net/mptcp/mptcp_coupled.c 1970-01-01 01:00:00.000000000 +0100
  5706. +++ linux-3.14.45/net/mptcp/mptcp_coupled.c 2015-06-24 14:15:48.891862483 +0200
  5707. @@ -0,0 +1,273 @@
  5708. +/*
  5709. + * MPTCP implementation - Coupled Congestion Control
  5710. + *
  5711. + * Initial Design & Implementation:
  5712. + * Sébastien Barré <sebastien.barre@uclouvain.be>
  5713. + *
  5714. + * Current Maintainer & Author:
  5715. + * Christoph Paasch <christoph.paasch@uclouvain.be>
  5716. + *
  5717. + * Additional authors:
  5718. + * Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
  5719. + * Gregory Detal <gregory.detal@uclouvain.be>
  5720. + * Fabien Duchêne <fabien.duchene@uclouvain.be>
  5721. + * Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
  5722. + * Lavkesh Lahngir <lavkesh51@gmail.com>
  5723. + * Andreas Ripke <ripke@neclab.eu>
  5724. + * Vlad Dogaru <vlad.dogaru@intel.com>
  5725. + * Octavian Purdila <octavian.purdila@intel.com>
  5726. + * John Ronan <jronan@tssg.org>
  5727. + * Catalin Nicutar <catalin.nicutar@gmail.com>
  5728. + * Brandon Heller <brandonh@stanford.edu>
  5729. + *
  5730. + *
  5731. + * This program is free software; you can redistribute it and/or
  5732. + * modify it under the terms of the GNU General Public License
  5733. + * as published by the Free Software Foundation; either version
  5734. + * 2 of the License, or (at your option) any later version.
  5735. + */
  5736. +#include <net/tcp.h>
  5737. +#include <net/mptcp.h>
  5738. +
  5739. +#include <linux/module.h>
  5740. +
  5741. +/* Scaling is done in the numerator with alpha_scale_num and in the denominator
  5742. + * with alpha_scale_den.
  5743. + *
  5744. + * To downscale, we just need to use alpha_scale.
  5745. + *
  5746. + * We have: alpha_scale = alpha_scale_num / (alpha_scale_den ^ 2)
  5747. + */
  5748. +static int alpha_scale_den = 10;
  5749. +static int alpha_scale_num = 32;
  5750. +static int alpha_scale = 12;
  5751. +
  5752. +struct mptcp_ccc {
  5753. + u64 alpha;
  5754. + bool forced_update;
  5755. +};
  5756. +
  5757. +static inline int mptcp_ccc_sk_can_send(const struct sock *sk)
  5758. +{
  5759. + return mptcp_sk_can_send(sk) && tcp_sk(sk)->srtt;
  5760. +}
  5761. +
  5762. +static inline u64 mptcp_get_alpha(struct sock *meta_sk)
  5763. +{
  5764. + struct mptcp_ccc *mptcp_ccc = inet_csk_ca(meta_sk);
  5765. + return mptcp_ccc->alpha;
  5766. +}
  5767. +
  5768. +static inline void mptcp_set_alpha(struct sock *meta_sk, u64 alpha)
  5769. +{
  5770. + struct mptcp_ccc *mptcp_ccc = inet_csk_ca(meta_sk);
  5771. + mptcp_ccc->alpha = alpha;
  5772. +}
  5773. +
  5774. +static inline u64 mptcp_ccc_scale(u32 val, int scale)
  5775. +{
  5776. + return (u64) val << scale;
  5777. +}
  5778. +
  5779. +static inline bool mptcp_get_forced(struct sock *meta_sk)
  5780. +{
  5781. + struct mptcp_ccc *mptcp_ccc = inet_csk_ca(meta_sk);
  5782. + return mptcp_ccc->forced_update;
  5783. +}
  5784. +
  5785. +static inline void mptcp_set_forced(struct sock *meta_sk, bool force)
  5786. +{
  5787. + struct mptcp_ccc *mptcp_ccc = inet_csk_ca(meta_sk);
  5788. + mptcp_ccc->forced_update = force;
  5789. +}
  5790. +
  5791. +static void mptcp_ccc_recalc_alpha(struct sock *sk)
  5792. +{
  5793. + struct mptcp_cb *mpcb = tcp_sk(sk)->mpcb;
  5794. + struct sock *sub_sk;
  5795. + int best_cwnd = 0, best_rtt = 0, can_send = 0;
  5796. + u64 max_numerator = 0, sum_denominator = 0, alpha = 1;
  5797. +
  5798. + if (!mpcb)
  5799. + return;
  5800. +
  5801. + /* Only one subflow left - fall back to normal reno-behavior
  5802. + * (set alpha to 1) */
  5803. + if (mpcb->cnt_established <= 1)
  5804. + goto exit;
  5805. +
  5806. + /* Do regular alpha-calculation for multiple subflows */
  5807. +
  5808. + /* Find the max numerator of the alpha-calculation */
  5809. + mptcp_for_each_sk(mpcb, sub_sk) {
  5810. + struct tcp_sock *sub_tp = tcp_sk(sub_sk);
  5811. + u64 tmp;
  5812. +
  5813. + if (!mptcp_ccc_sk_can_send(sub_sk))
  5814. + continue;
  5815. +
  5816. + can_send++;
  5817. +
  5818. + /* We need to look for the path, that provides the max-value.
  5819. + * Integer-overflow is not possible here, because
  5820. + * tmp will be in u64.
  5821. + */
  5822. + tmp = div64_u64(mptcp_ccc_scale(sub_tp->snd_cwnd,
  5823. + alpha_scale_num), (u64)sub_tp->srtt * sub_tp->srtt);
  5824. +
  5825. + if (tmp >= max_numerator) {
  5826. + max_numerator = tmp;
  5827. + best_cwnd = sub_tp->snd_cwnd;
  5828. + best_rtt = sub_tp->srtt;
  5829. + }
  5830. + }
  5831. +
  5832. + /* No subflow is able to send - we don't care anymore */
  5833. + if (unlikely(!can_send))
  5834. + goto exit;
  5835. +
  5836. + /* Calculate the denominator */
  5837. + mptcp_for_each_sk(mpcb, sub_sk) {
  5838. + struct tcp_sock *sub_tp = tcp_sk(sub_sk);
  5839. +
  5840. + if (!mptcp_ccc_sk_can_send(sub_sk))
  5841. + continue;
  5842. +
  5843. + sum_denominator += div_u64(
  5844. + mptcp_ccc_scale(sub_tp->snd_cwnd,
  5845. + alpha_scale_den) * best_rtt,
  5846. + sub_tp->srtt);
  5847. + }
  5848. + sum_denominator *= sum_denominator;
  5849. + if (unlikely(!sum_denominator)) {
  5850. + pr_err("%s: sum_denominator == 0, cnt_established:%d\n",
  5851. + __func__, mpcb->cnt_established);
  5852. + mptcp_for_each_sk(mpcb, sub_sk) {
  5853. + struct tcp_sock *sub_tp = tcp_sk(sub_sk);
  5854. + pr_err("%s: pi:%d, state:%d\n, rtt:%u, cwnd: %u",
  5855. + __func__, sub_tp->mptcp->path_index,
  5856. + sub_sk->sk_state, sub_tp->srtt,
  5857. + sub_tp->snd_cwnd);
  5858. + }
  5859. + }
  5860. +
  5861. + alpha = div64_u64(mptcp_ccc_scale(best_cwnd, alpha_scale_num), sum_denominator);
  5862. +
  5863. + if (unlikely(!alpha))
  5864. + alpha = 1;
  5865. +
  5866. +exit:
  5867. + mptcp_set_alpha(mptcp_meta_sk(sk), alpha);
  5868. +}
  5869. +
  5870. +static void mptcp_ccc_init(struct sock *sk)
  5871. +{
  5872. + if (tcp_sk(sk)->mpc) {
  5873. + mptcp_set_forced(mptcp_meta_sk(sk), 0);
  5874. + mptcp_set_alpha(mptcp_meta_sk(sk), 1);
  5875. + }
  5876. + /* If we do not mptcp, behave like reno: return */
  5877. +}
  5878. +
  5879. +static void mptcp_ccc_cwnd_event(struct sock *sk, enum tcp_ca_event event)
  5880. +{
  5881. + if (event == CA_EVENT_LOSS)
  5882. + mptcp_ccc_recalc_alpha(sk);
  5883. +}
  5884. +
  5885. +static void mptcp_ccc_set_state(struct sock *sk, u8 ca_state)
  5886. +{
  5887. + if (!tcp_sk(sk)->mpc)
  5888. + return;
  5889. +
  5890. + mptcp_set_forced(mptcp_meta_sk(sk), 1);
  5891. +}
  5892. +
  5893. +static void mptcp_ccc_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
  5894. +{
  5895. + struct tcp_sock *tp = tcp_sk(sk);
  5896. + struct mptcp_cb *mpcb = tp->mpcb;
  5897. + int snd_cwnd;
  5898. +
  5899. + if (!tp->mpc) {
  5900. + tcp_reno_cong_avoid(sk, ack, acked, in_flight);
  5901. + return;
  5902. + }
  5903. +
  5904. + if (!tcp_is_cwnd_limited(sk, in_flight))
  5905. + return;
  5906. +
  5907. + if (tp->snd_cwnd <= tp->snd_ssthresh) {
  5908. + /* In "safe" area, increase. */
  5909. + tcp_slow_start(tp, acked);
  5910. + mptcp_ccc_recalc_alpha(sk);
  5911. + return;
  5912. + }
  5913. +
  5914. + if (mptcp_get_forced(mptcp_meta_sk(sk))) {
  5915. + mptcp_ccc_recalc_alpha(sk);
  5916. + mptcp_set_forced(mptcp_meta_sk(sk), 0);
  5917. + }
  5918. +
  5919. + if (mpcb->cnt_established > 1) {
  5920. + u64 alpha = mptcp_get_alpha(mptcp_meta_sk(sk));
  5921. +
  5922. + /* This may happen, if at the initialization, the mpcb
  5923. + * was not yet attached to the sock, and thus
  5924. + * initializing alpha failed.
  5925. + */
  5926. + if (unlikely(!alpha))
  5927. + alpha = 1;
  5928. +
  5929. + snd_cwnd = (int) div_u64 ((u64) mptcp_ccc_scale(1, alpha_scale),
  5930. + alpha);
  5931. +
  5932. + /* snd_cwnd_cnt >= max (scale * tot_cwnd / alpha, cwnd)
  5933. + * Thus, we select here the max value. */
  5934. + if (snd_cwnd < tp->snd_cwnd)
  5935. + snd_cwnd = tp->snd_cwnd;
  5936. + } else {
  5937. + snd_cwnd = tp->snd_cwnd;
  5938. + }
  5939. +
  5940. + if (tp->snd_cwnd_cnt >= snd_cwnd) {
  5941. + if (tp->snd_cwnd < tp->snd_cwnd_clamp) {
  5942. + tp->snd_cwnd++;
  5943. + mptcp_ccc_recalc_alpha(sk);
  5944. + }
  5945. +
  5946. + tp->snd_cwnd_cnt = 0;
  5947. + } else {
  5948. + tp->snd_cwnd_cnt++;
  5949. + }
  5950. +}
  5951. +
  5952. +static struct tcp_congestion_ops mptcp_ccc = {
  5953. + .init = mptcp_ccc_init,
  5954. + .ssthresh = tcp_reno_ssthresh,
  5955. + .cong_avoid = mptcp_ccc_cong_avoid,
  5956. + .cwnd_event = mptcp_ccc_cwnd_event,
  5957. + .set_state = mptcp_ccc_set_state,
  5958. + .min_cwnd = tcp_reno_min_cwnd,
  5959. + .owner = THIS_MODULE,
  5960. + .name = "coupled",
  5961. +};
  5962. +
  5963. +static int __init mptcp_ccc_register(void)
  5964. +{
  5965. + BUILD_BUG_ON(sizeof(struct mptcp_ccc) > ICSK_CA_PRIV_SIZE);
  5966. + return tcp_register_congestion_control(&mptcp_ccc);
  5967. +}
  5968. +
  5969. +static void __exit mptcp_ccc_unregister(void)
  5970. +{
  5971. + tcp_unregister_congestion_control(&mptcp_ccc);
  5972. +}
  5973. +
  5974. +module_init(mptcp_ccc_register);
  5975. +module_exit(mptcp_ccc_unregister);
  5976. +
  5977. +MODULE_AUTHOR("Christoph Paasch, Sébastien Barré");
  5978. +MODULE_LICENSE("GPL");
  5979. +MODULE_DESCRIPTION("MPTCP COUPLED CONGESTION CONTROL");
  5980. +MODULE_VERSION("0.1");
  5981. diff -Nur linux-3.14.45.orig/net/mptcp/mptcp_ctrl.c linux-3.14.45/net/mptcp/mptcp_ctrl.c
  5982. --- linux-3.14.45.orig/net/mptcp/mptcp_ctrl.c 1970-01-01 01:00:00.000000000 +0100
  5983. +++ linux-3.14.45/net/mptcp/mptcp_ctrl.c 2015-06-24 14:15:48.891862483 +0200
  5984. @@ -0,0 +1,2270 @@
  5985. +/*
  5986. + * MPTCP implementation - MPTCP-control
  5987. + *
  5988. + * Initial Design & Implementation:
  5989. + * Sébastien Barré <sebastien.barre@uclouvain.be>
  5990. + *
  5991. + * Current Maintainer & Author:
  5992. + * Christoph Paasch <christoph.paasch@uclouvain.be>
  5993. + *
  5994. + * Additional authors:
  5995. + * Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
  5996. + * Gregory Detal <gregory.detal@uclouvain.be>
  5997. + * Fabien Duchêne <fabien.duchene@uclouvain.be>
  5998. + * Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
  5999. + * Lavkesh Lahngir <lavkesh51@gmail.com>
  6000. + * Andreas Ripke <ripke@neclab.eu>
  6001. + * Vlad Dogaru <vlad.dogaru@intel.com>
  6002. + * Octavian Purdila <octavian.purdila@intel.com>
  6003. + * John Ronan <jronan@tssg.org>
  6004. + * Catalin Nicutar <catalin.nicutar@gmail.com>
  6005. + * Brandon Heller <brandonh@stanford.edu>
  6006. + *
  6007. + *
  6008. + * This program is free software; you can redistribute it and/or
  6009. + * modify it under the terms of the GNU General Public License
  6010. + * as published by the Free Software Foundation; either version
  6011. + * 2 of the License, or (at your option) any later version.
  6012. + */
  6013. +
  6014. +#include <net/inet_common.h>
  6015. +#include <net/inet6_hashtables.h>
  6016. +#include <net/ipv6.h>
  6017. +#include <net/ip6_checksum.h>
  6018. +#include <net/mptcp.h>
  6019. +#include <net/mptcp_v4.h>
  6020. +#if IS_ENABLED(CONFIG_IPV6)
  6021. +#include <net/mptcp_v6.h>
  6022. +#endif
  6023. +#include <net/sock.h>
  6024. +#include <net/tcp.h>
  6025. +#include <net/tcp_states.h>
  6026. +#include <net/transp_v6.h>
  6027. +#include <net/xfrm.h>
  6028. +
  6029. +#include <linux/cryptohash.h>
  6030. +#include <linux/kconfig.h>
  6031. +#include <linux/module.h>
  6032. +#include <linux/netpoll.h>
  6033. +#include <linux/list.h>
  6034. +#include <linux/jhash.h>
  6035. +#include <linux/tcp.h>
  6036. +#include <linux/net.h>
  6037. +#include <linux/in.h>
  6038. +#include <linux/random.h>
  6039. +#include <linux/inetdevice.h>
  6040. +#include <linux/workqueue.h>
  6041. +#include <linux/atomic.h>
  6042. +#include <linux/sysctl.h>
  6043. +
  6044. +static struct kmem_cache *mptcp_sock_cache __read_mostly;
  6045. +static struct kmem_cache *mptcp_cb_cache __read_mostly;
  6046. +static struct kmem_cache *mptcp_tw_cache __read_mostly;
  6047. +
  6048. +int sysctl_mptcp_enabled __read_mostly = 1;
  6049. +int sysctl_mptcp_checksum __read_mostly = 1;
  6050. +int sysctl_mptcp_debug __read_mostly;
  6051. +EXPORT_SYMBOL(sysctl_mptcp_debug);
  6052. +int sysctl_mptcp_syn_retries __read_mostly = 3;
  6053. +
  6054. +bool mptcp_init_failed __read_mostly;
  6055. +
  6056. +static int proc_mptcp_path_manager(ctl_table *ctl, int write,
  6057. + void __user *buffer, size_t *lenp,
  6058. + loff_t *ppos)
  6059. +{
  6060. + char val[MPTCP_PM_NAME_MAX];
  6061. + ctl_table tbl = {
  6062. + .data = val,
  6063. + .maxlen = MPTCP_PM_NAME_MAX,
  6064. + };
  6065. + int ret;
  6066. +
  6067. + mptcp_get_default_path_manager(val);
  6068. +
  6069. + ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
  6070. + if (write && ret == 0)
  6071. + ret = mptcp_set_default_path_manager(val);
  6072. + return ret;
  6073. +}
  6074. +
  6075. +static struct ctl_table mptcp_table[] = {
  6076. + {
  6077. + .procname = "mptcp_enabled",
  6078. + .data = &sysctl_mptcp_enabled,
  6079. + .maxlen = sizeof(int),
  6080. + .mode = 0644,
  6081. + .proc_handler = &proc_dointvec
  6082. + },
  6083. + {
  6084. + .procname = "mptcp_checksum",
  6085. + .data = &sysctl_mptcp_checksum,
  6086. + .maxlen = sizeof(int),
  6087. + .mode = 0644,
  6088. + .proc_handler = &proc_dointvec
  6089. + },
  6090. + {
  6091. + .procname = "mptcp_debug",
  6092. + .data = &sysctl_mptcp_debug,
  6093. + .maxlen = sizeof(int),
  6094. + .mode = 0644,
  6095. + .proc_handler = &proc_dointvec
  6096. + },
  6097. + {
  6098. + .procname = "mptcp_syn_retries",
  6099. + .data = &sysctl_mptcp_syn_retries,
  6100. + .maxlen = sizeof(int),
  6101. + .mode = 0644,
  6102. + .proc_handler = &proc_dointvec
  6103. + },
  6104. + {
  6105. + .procname = "mptcp_path_manager",
  6106. + .mode = 0644,
  6107. + .maxlen = MPTCP_PM_NAME_MAX,
  6108. + .proc_handler = proc_mptcp_path_manager,
  6109. + },
  6110. + { }
  6111. +};
  6112. +
  6113. +static inline u32 mptcp_hash_tk(u32 token)
  6114. +{
  6115. + return token % MPTCP_HASH_SIZE;
  6116. +}
  6117. +
  6118. +struct hlist_nulls_head tk_hashtable[MPTCP_HASH_SIZE];
  6119. +EXPORT_SYMBOL(tk_hashtable);
  6120. +
  6121. +/* This second hashtable is needed to retrieve request socks
  6122. + * created as a result of a join request. While the SYN contains
  6123. + * the token, the final ack does not, so we need a separate hashtable
  6124. + * to retrieve the mpcb.
  6125. + */
  6126. +struct list_head mptcp_reqsk_htb[MPTCP_HASH_SIZE];
  6127. +spinlock_t mptcp_reqsk_hlock; /* hashtable protection */
  6128. +
  6129. +/* The following hash table is used to avoid collision of token */
  6130. +static struct hlist_nulls_head mptcp_reqsk_tk_htb[MPTCP_HASH_SIZE];
  6131. +spinlock_t mptcp_tk_hashlock; /* hashtable protection */
  6132. +
  6133. +static int mptcp_reqsk_find_tk(u32 token)
  6134. +{
  6135. + u32 hash = mptcp_hash_tk(token);
  6136. + struct mptcp_request_sock *mtreqsk;
  6137. + const struct hlist_nulls_node *node;
  6138. +
  6139. + hlist_nulls_for_each_entry_rcu(mtreqsk, node,
  6140. + &mptcp_reqsk_tk_htb[hash], collide_tk) {
  6141. + if (token == mtreqsk->mptcp_loc_token)
  6142. + return 1;
  6143. + }
  6144. + return 0;
  6145. +}
  6146. +
  6147. +static void mptcp_reqsk_insert_tk(struct request_sock *reqsk, u32 token)
  6148. +{
  6149. + u32 hash = mptcp_hash_tk(token);
  6150. +
  6151. + hlist_nulls_add_head_rcu(&mptcp_rsk(reqsk)->collide_tk,
  6152. + &mptcp_reqsk_tk_htb[hash]);
  6153. +}
  6154. +
  6155. +static void mptcp_reqsk_remove_tk(struct request_sock *reqsk)
  6156. +{
  6157. + rcu_read_lock();
  6158. + spin_lock(&mptcp_tk_hashlock);
  6159. + hlist_nulls_del_init_rcu(&mptcp_rsk(reqsk)->collide_tk);
  6160. + spin_unlock(&mptcp_tk_hashlock);
  6161. + rcu_read_unlock();
  6162. +}
  6163. +
  6164. +void mptcp_reqsk_destructor(struct request_sock *req)
  6165. +{
  6166. + if (!mptcp_rsk(req)->mpcb) {
  6167. + if (in_softirq()) {
  6168. + mptcp_reqsk_remove_tk(req);
  6169. + } else {
  6170. + rcu_read_lock_bh();
  6171. + spin_lock(&mptcp_tk_hashlock);
  6172. + hlist_nulls_del_init_rcu(&mptcp_rsk(req)->collide_tk);
  6173. + spin_unlock(&mptcp_tk_hashlock);
  6174. + rcu_read_unlock_bh();
  6175. + }
  6176. + } else {
  6177. + mptcp_hash_request_remove(req);
  6178. + }
  6179. +}
  6180. +
  6181. +static void __mptcp_hash_insert(struct tcp_sock *meta_tp, u32 token)
  6182. +{
  6183. + u32 hash = mptcp_hash_tk(token);
  6184. + hlist_nulls_add_head_rcu(&meta_tp->tk_table, &tk_hashtable[hash]);
  6185. + meta_tp->inside_tk_table = 1;
  6186. +}
  6187. +
  6188. +static int mptcp_find_token(u32 token)
  6189. +{
  6190. + u32 hash = mptcp_hash_tk(token);
  6191. + struct tcp_sock *meta_tp;
  6192. + const struct hlist_nulls_node *node;
  6193. +
  6194. + hlist_nulls_for_each_entry_rcu(meta_tp, node, &tk_hashtable[hash], tk_table) {
  6195. + if (token == meta_tp->mptcp_loc_token)
  6196. + return 1;
  6197. + }
  6198. + return 0;
  6199. +}
  6200. +
  6201. +static void mptcp_set_key_reqsk(struct request_sock *req,
  6202. + const struct sk_buff *skb)
  6203. +{
  6204. + struct inet_request_sock *ireq = inet_rsk(req);
  6205. + struct mptcp_request_sock *mtreq = mptcp_rsk(req);
  6206. +
  6207. + if (skb->protocol == htons(ETH_P_IP)) {
  6208. + mtreq->mptcp_loc_key = mptcp_v4_get_key(ip_hdr(skb)->saddr,
  6209. + ip_hdr(skb)->daddr,
  6210. + htons(ireq->ir_num),
  6211. + ireq->ir_rmt_port);
  6212. +#if IS_ENABLED(CONFIG_IPV6)
  6213. + } else {
  6214. + mtreq->mptcp_loc_key = mptcp_v6_get_key(ipv6_hdr(skb)->saddr.s6_addr32,
  6215. + ipv6_hdr(skb)->daddr.s6_addr32,
  6216. + htons(ireq->ir_num),
  6217. + ireq->ir_rmt_port);
  6218. +#endif
  6219. + }
  6220. +
  6221. + mptcp_key_sha1(mtreq->mptcp_loc_key, &mtreq->mptcp_loc_token, NULL);
  6222. +}
  6223. +
  6224. +/* New MPTCP-connection request, prepare a new token for the meta-socket that
  6225. + * will be created in mptcp_check_req_master(), and store the received token.
  6226. + */
  6227. +void mptcp_reqsk_new_mptcp(struct request_sock *req,
  6228. + const struct tcp_options_received *rx_opt,
  6229. + const struct mptcp_options_received *mopt,
  6230. + const struct sk_buff *skb)
  6231. +{
  6232. + struct mptcp_request_sock *mtreq = mptcp_rsk(req);
  6233. +
  6234. + tcp_rsk(req)->saw_mpc = 1;
  6235. +
  6236. + rcu_read_lock();
  6237. + spin_lock(&mptcp_tk_hashlock);
  6238. + do {
  6239. + mptcp_set_key_reqsk(req, skb);
  6240. + } while (mptcp_reqsk_find_tk(mtreq->mptcp_loc_token) ||
  6241. + mptcp_find_token(mtreq->mptcp_loc_token));
  6242. +
  6243. + mptcp_reqsk_insert_tk(req, mtreq->mptcp_loc_token);
  6244. + spin_unlock(&mptcp_tk_hashlock);
  6245. + rcu_read_unlock();
  6246. + mtreq->mptcp_rem_key = mopt->mptcp_key;
  6247. +}
  6248. +
  6249. +static void mptcp_set_key_sk(struct sock *sk)
  6250. +{
  6251. + struct tcp_sock *tp = tcp_sk(sk);
  6252. + struct inet_sock *isk = inet_sk(sk);
  6253. +
  6254. + if (sk->sk_family == AF_INET)
  6255. + tp->mptcp_loc_key = mptcp_v4_get_key(isk->inet_saddr,
  6256. + isk->inet_daddr,
  6257. + isk->inet_sport,
  6258. + isk->inet_dport);
  6259. +#if IS_ENABLED(CONFIG_IPV6)
  6260. + else
  6261. + tp->mptcp_loc_key = mptcp_v6_get_key(inet6_sk(sk)->saddr.s6_addr32,
  6262. + sk->sk_v6_daddr.s6_addr32,
  6263. + isk->inet_sport,
  6264. + isk->inet_dport);
  6265. +#endif
  6266. +
  6267. + mptcp_key_sha1(tp->mptcp_loc_key,
  6268. + &tp->mptcp_loc_token, NULL);
  6269. +}
  6270. +
  6271. +void mptcp_connect_init(struct sock *sk)
  6272. +{
  6273. + struct tcp_sock *tp = tcp_sk(sk);
  6274. +
  6275. + rcu_read_lock_bh();
  6276. + spin_lock(&mptcp_tk_hashlock);
  6277. + do {
  6278. + mptcp_set_key_sk(sk);
  6279. + } while (mptcp_reqsk_find_tk(tp->mptcp_loc_token) ||
  6280. + mptcp_find_token(tp->mptcp_loc_token));
  6281. +
  6282. + __mptcp_hash_insert(tp, tp->mptcp_loc_token);
  6283. + spin_unlock(&mptcp_tk_hashlock);
  6284. + rcu_read_unlock_bh();
  6285. +}
  6286. +
  6287. +/**
  6288. + * This function increments the refcount of the mpcb struct.
  6289. + * It is the responsibility of the caller to decrement when releasing
  6290. + * the structure.
  6291. + */
  6292. +struct sock *mptcp_hash_find(struct net *net, u32 token)
  6293. +{
  6294. + u32 hash = mptcp_hash_tk(token);
  6295. + struct tcp_sock *meta_tp;
  6296. + struct sock *meta_sk = NULL;
  6297. + struct hlist_nulls_node *node;
  6298. +
  6299. + rcu_read_lock();
  6300. + hlist_nulls_for_each_entry_rcu(meta_tp, node, &tk_hashtable[hash],
  6301. + tk_table) {
  6302. + meta_sk = (struct sock *)meta_tp;
  6303. + if (token == meta_tp->mptcp_loc_token &&
  6304. + net_eq(net, sock_net(meta_sk)) &&
  6305. + atomic_inc_not_zero(&meta_sk->sk_refcnt))
  6306. + break;
  6307. + meta_sk = NULL;
  6308. + }
  6309. + rcu_read_unlock();
  6310. + return meta_sk;
  6311. +}
  6312. +
  6313. +void mptcp_hash_remove_bh(struct tcp_sock *meta_tp)
  6314. +{
  6315. + /* remove from the token hashtable */
  6316. + rcu_read_lock_bh();
  6317. + spin_lock(&mptcp_tk_hashlock);
  6318. + hlist_nulls_del_init_rcu(&meta_tp->tk_table);
  6319. + meta_tp->inside_tk_table = 0;
  6320. + spin_unlock(&mptcp_tk_hashlock);
  6321. + rcu_read_unlock_bh();
  6322. +}
  6323. +
  6324. +void mptcp_hash_remove(struct tcp_sock *meta_tp)
  6325. +{
  6326. + rcu_read_lock();
  6327. + spin_lock(&mptcp_tk_hashlock);
  6328. + hlist_nulls_del_init_rcu(&meta_tp->tk_table);
  6329. + meta_tp->inside_tk_table = 0;
  6330. + spin_unlock(&mptcp_tk_hashlock);
  6331. + rcu_read_unlock();
  6332. +}
  6333. +
  6334. +static struct sock *mptcp_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
  6335. + struct request_sock *req,
  6336. + struct dst_entry *dst)
  6337. +{
  6338. +#if IS_ENABLED(CONFIG_IPV6)
  6339. + if (sk->sk_family == AF_INET6)
  6340. + return tcp_v6_syn_recv_sock(sk, skb, req, dst);
  6341. +
  6342. + /* sk->sk_family == AF_INET */
  6343. + if (req->rsk_ops->family == AF_INET6)
  6344. + return mptcp_v6v4_syn_recv_sock(sk, skb, req, dst);
  6345. +#endif
  6346. +
  6347. + /* sk->sk_family == AF_INET && req->rsk_ops->family == AF_INET */
  6348. + return tcp_v4_syn_recv_sock(sk, skb, req, dst);
  6349. +}
  6350. +
  6351. +struct sock *mptcp_select_ack_sock(const struct sock *meta_sk, int copied)
  6352. +{
  6353. + struct tcp_sock *meta_tp = tcp_sk(meta_sk);
  6354. + struct sock *sk, *subsk = NULL;
  6355. + u32 max_data_seq = 0;
  6356. + /* max_data_seq initialized to correct compiler-warning.
  6357. + * But the initialization is handled by max_data_seq_set
  6358. + */
  6359. + short max_data_seq_set = 0;
  6360. + u32 min_time = 0xffffffff;
  6361. +
  6362. + /* How do we select the subflow to send the window-update on?
  6363. + *
  6364. + * 1. He has to be in a state where he can send an ack and is
  6365. + * operational (pf = 0).
  6366. + * 2. He has to be one of those subflow who recently
  6367. + * contributed to the received stream
  6368. + * (this guarantees a working subflow)
  6369. + * a) its latest data_seq received is after the original
  6370. + * copied_seq.
  6371. + * We select the one with the lowest rtt, so that the
  6372. + * window-update reaches our peer the fastest.
  6373. + * b) if no subflow has this kind of data_seq (e.g., very
  6374. + * strange meta-level retransmissions going on), we take
  6375. + * the subflow who last sent the highest data_seq.
  6376. + */
  6377. + mptcp_for_each_sk(meta_tp->mpcb, sk) {
  6378. + struct tcp_sock *tp = tcp_sk(sk);
  6379. +
  6380. + if (!mptcp_sk_can_send_ack(sk) || tp->pf)
  6381. + continue;
  6382. +
  6383. + /* Select among those who contributed to the
  6384. + * current receive-queue.
  6385. + */
  6386. + if (copied && after(tp->mptcp->last_data_seq, meta_tp->copied_seq - copied)) {
  6387. + if (tp->srtt < min_time) {
  6388. + min_time = tp->srtt;
  6389. + subsk = sk;
  6390. + max_data_seq_set = 0;
  6391. + }
  6392. + continue;
  6393. + }
  6394. +
  6395. + if (!subsk && !max_data_seq_set) {
  6396. + max_data_seq = tp->mptcp->last_data_seq;
  6397. + max_data_seq_set = 1;
  6398. + subsk = sk;
  6399. + }
  6400. +
  6401. + /* Otherwise, take the one with the highest data_seq */
  6402. + if ((!subsk || max_data_seq_set) &&
  6403. + after(tp->mptcp->last_data_seq, max_data_seq)) {
  6404. + max_data_seq = tp->mptcp->last_data_seq;
  6405. + subsk = sk;
  6406. + }
  6407. + }
  6408. +
  6409. + if (!subsk) {
  6410. + mptcp_debug("%s subsk is null, copied %d, cseq %u\n", __func__,
  6411. + copied, meta_tp->copied_seq);
  6412. + mptcp_for_each_sk(meta_tp->mpcb, sk) {
  6413. + struct tcp_sock *tp = tcp_sk(sk);
  6414. + mptcp_debug("%s pi %d state %u last_dseq %u\n",
  6415. + __func__, tp->mptcp->path_index, sk->sk_state,
  6416. + tp->mptcp->last_data_seq);
  6417. + }
  6418. + }
  6419. +
  6420. + return subsk;
  6421. +}
  6422. +EXPORT_SYMBOL(mptcp_select_ack_sock);
  6423. +
  6424. +static void mptcp_sock_def_error_report(struct sock *sk)
  6425. +{
  6426. + struct mptcp_cb *mpcb = tcp_sk(sk)->mpcb;
  6427. +
  6428. + if (!sock_flag(sk, SOCK_DEAD))
  6429. + mptcp_sub_close(sk, 0);
  6430. +
  6431. + if (mpcb->infinite_mapping_rcv || mpcb->infinite_mapping_snd ||
  6432. + mpcb->send_infinite_mapping) {
  6433. + struct sock *meta_sk = mptcp_meta_sk(sk);
  6434. +
  6435. + meta_sk->sk_err = sk->sk_err;
  6436. + meta_sk->sk_err_soft = sk->sk_err_soft;
  6437. +
  6438. + if (!sock_flag(meta_sk, SOCK_DEAD))
  6439. + meta_sk->sk_error_report(meta_sk);
  6440. +
  6441. + tcp_done(meta_sk);
  6442. + }
  6443. +
  6444. + sk->sk_err = 0;
  6445. + return;
  6446. +}
  6447. +
  6448. +static void mptcp_mpcb_put(struct mptcp_cb *mpcb)
  6449. +{
  6450. + if (atomic_dec_and_test(&mpcb->mpcb_refcnt)) {
  6451. + mptcp_cleanup_path_manager(mpcb);
  6452. + kmem_cache_free(mptcp_cb_cache, mpcb);
  6453. + }
  6454. +}
  6455. +
  6456. +static void mptcp_sock_destruct(struct sock *sk)
  6457. +{
  6458. + struct tcp_sock *tp = tcp_sk(sk);
  6459. +
  6460. + inet_sock_destruct(sk);
  6461. +
  6462. + BUG_ON(!list_empty(&tp->mptcp->cb_list));
  6463. +
  6464. + kmem_cache_free(mptcp_sock_cache, tp->mptcp);
  6465. + tp->mptcp = NULL;
  6466. +
  6467. + if (!is_meta_sk(sk) && !tp->was_meta_sk) {
  6468. + /* Taken when mpcb pointer was set */
  6469. + sock_put(mptcp_meta_sk(sk));
  6470. + mptcp_mpcb_put(tp->mpcb);
  6471. + } else {
  6472. + struct mptcp_cb *mpcb = tp->mpcb;
  6473. + struct mptcp_tw *mptw;
  6474. +
  6475. + /* The mpcb is disappearing - we can make the final
  6476. + * update to the rcv_nxt of the time-wait-sock and remove
  6477. + * its reference to the mpcb.
  6478. + */
  6479. + spin_lock_bh(&mpcb->tw_lock);
  6480. + list_for_each_entry_rcu(mptw, &mpcb->tw_list, list) {
  6481. + list_del_rcu(&mptw->list);
  6482. + mptw->in_list = 0;
  6483. + mptcp_mpcb_put(mpcb);
  6484. + rcu_assign_pointer(mptw->mpcb, NULL);
  6485. + }
  6486. + spin_unlock_bh(&mpcb->tw_lock);
  6487. +
  6488. + mptcp_mpcb_put(mpcb);
  6489. +
  6490. + mptcp_debug("%s destroying meta-sk\n", __func__);
  6491. + }
  6492. +}
  6493. +
  6494. +void mptcp_destroy_sock(struct sock *sk)
  6495. +{
  6496. + if (is_meta_sk(sk)) {
  6497. + struct sock *sk_it, *tmpsk;
  6498. +
  6499. + __skb_queue_purge(&tcp_sk(sk)->mpcb->reinject_queue);
  6500. + mptcp_purge_ofo_queue(tcp_sk(sk));
  6501. +
  6502. + /* We have to close all remaining subflows. Normally, they
  6503. + * should all be about to get closed. But, if the kernel is
  6504. + * forcing a closure (e.g., tcp_write_err), the subflows might
  6505. + * not have been closed properly (as we are waiting for the
  6506. + * DATA_ACK of the DATA_FIN).
  6507. + */
  6508. + mptcp_for_each_sk_safe(tcp_sk(sk)->mpcb, sk_it, tmpsk) {
  6509. + /* Already did call tcp_close - waiting for graceful
  6510. + * closure, or if we are retransmitting fast-close on
  6511. + * the subflow. The reset (or timeout) will kill the
  6512. + * subflow..
  6513. + */
  6514. + if (tcp_sk(sk_it)->closing ||
  6515. + tcp_sk(sk_it)->send_mp_fclose)
  6516. + continue;
  6517. +
  6518. + /* Allow the delayed work first to prevent time-wait state */
  6519. + if (delayed_work_pending(&tcp_sk(sk_it)->mptcp->work))
  6520. + continue;
  6521. +
  6522. + mptcp_sub_close(sk_it, 0);
  6523. + }
  6524. + } else {
  6525. + mptcp_del_sock(sk);
  6526. + }
  6527. +}
  6528. +
  6529. +static void mptcp_set_state(struct sock *sk)
  6530. +{
  6531. + struct sock *meta_sk = mptcp_meta_sk(sk);
  6532. +
  6533. + /* Meta is not yet established - wake up the application */
  6534. + if ((1 << meta_sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV) &&
  6535. + sk->sk_state == TCP_ESTABLISHED) {
  6536. + tcp_set_state(meta_sk, TCP_ESTABLISHED);
  6537. +
  6538. + if (!sock_flag(meta_sk, SOCK_DEAD)) {
  6539. + meta_sk->sk_state_change(meta_sk);
  6540. + sk_wake_async(meta_sk, SOCK_WAKE_IO, POLL_OUT);
  6541. + }
  6542. + }
  6543. +
  6544. + if (sk->sk_state == TCP_ESTABLISHED) {
  6545. + tcp_sk(sk)->mptcp->establish_increased = 1;
  6546. + tcp_sk(sk)->mpcb->cnt_established++;
  6547. + }
  6548. +}
  6549. +
  6550. +u32 mptcp_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
  6551. +u32 mptcp_key_seed = 0;
  6552. +
  6553. +void mptcp_key_sha1(u64 key, u32 *token, u64 *idsn)
  6554. +{
  6555. + u32 workspace[SHA_WORKSPACE_WORDS];
  6556. + u32 mptcp_hashed_key[SHA_DIGEST_WORDS];
  6557. + u8 input[64];
  6558. + int i;
  6559. +
  6560. + memset(workspace, 0, sizeof(workspace));
  6561. +
  6562. + /* Initialize input with appropriate padding */
  6563. + memset(&input[9], 0, sizeof(input) - 10); /* -10, because the last byte
  6564. + * is explicitly set too */
  6565. + memcpy(input, &key, sizeof(key)); /* Copy key to the msg beginning */
  6566. + input[8] = 0x80; /* Padding: First bit after message = 1 */
  6567. + input[63] = 0x40; /* Padding: Length of the message = 64 bits */
  6568. +
  6569. + sha_init(mptcp_hashed_key);
  6570. + sha_transform(mptcp_hashed_key, input, workspace);
  6571. +
  6572. + for (i = 0; i < 5; i++)
  6573. + mptcp_hashed_key[i] = cpu_to_be32(mptcp_hashed_key[i]);
  6574. +
  6575. + if (token)
  6576. + *token = mptcp_hashed_key[0];
  6577. + if (idsn)
  6578. + *idsn = *((u64 *)&mptcp_hashed_key[3]);
  6579. +}
  6580. +
  6581. +void mptcp_hmac_sha1(u8 *key_1, u8 *key_2, u8 *rand_1, u8 *rand_2,
  6582. + u32 *hash_out)
  6583. +{
  6584. + u32 workspace[SHA_WORKSPACE_WORDS];
  6585. + u8 input[128]; /* 2 512-bit blocks */
  6586. + int i;
  6587. +
  6588. + memset(workspace, 0, sizeof(workspace));
  6589. +
  6590. + /* Generate key xored with ipad */
  6591. + memset(input, 0x36, 64);
  6592. + for (i = 0; i < 8; i++)
  6593. + input[i] ^= key_1[i];
  6594. + for (i = 0; i < 8; i++)
  6595. + input[i + 8] ^= key_2[i];
  6596. +
  6597. + memcpy(&input[64], rand_1, 4);
  6598. + memcpy(&input[68], rand_2, 4);
  6599. + input[72] = 0x80; /* Padding: First bit after message = 1 */
  6600. + memset(&input[73], 0, 53);
  6601. +
  6602. + /* Padding: Length of the message = 512 + 64 bits */
  6603. + input[126] = 0x02;
  6604. + input[127] = 0x40;
  6605. +
  6606. + sha_init(hash_out);
  6607. + sha_transform(hash_out, input, workspace);
  6608. + memset(workspace, 0, sizeof(workspace));
  6609. +
  6610. + sha_transform(hash_out, &input[64], workspace);
  6611. + memset(workspace, 0, sizeof(workspace));
  6612. +
  6613. + for (i = 0; i < 5; i++)
  6614. + hash_out[i] = cpu_to_be32(hash_out[i]);
  6615. +
  6616. + /* Prepare second part of hmac */
  6617. + memset(input, 0x5C, 64);
  6618. + for (i = 0; i < 8; i++)
  6619. + input[i] ^= key_1[i];
  6620. + for (i = 0; i < 8; i++)
  6621. + input[i + 8] ^= key_2[i];
  6622. +
  6623. + memcpy(&input[64], hash_out, 20);
  6624. + input[84] = 0x80;
  6625. + memset(&input[85], 0, 41);
  6626. +
  6627. + /* Padding: Length of the message = 512 + 160 bits */
  6628. + input[126] = 0x02;
  6629. + input[127] = 0xA0;
  6630. +
  6631. + sha_init(hash_out);
  6632. + sha_transform(hash_out, input, workspace);
  6633. + memset(workspace, 0, sizeof(workspace));
  6634. +
  6635. + sha_transform(hash_out, &input[64], workspace);
  6636. +
  6637. + for (i = 0; i < 5; i++)
  6638. + hash_out[i] = cpu_to_be32(hash_out[i]);
  6639. +}
  6640. +
  6641. +static void mptcp_mpcb_inherit_sockopts(struct sock *meta_sk, struct sock *master_sk)
  6642. +{
  6643. + /* Socket-options handled by mptcp_inherit_sk while creating the meta-sk.
  6644. + * ======
  6645. + * SO_SNDBUF, SO_SNDBUFFORCE, SO_RCVBUF, SO_RCVBUFFORCE, SO_RCVLOWAT,
  6646. + * SO_RCVTIMEO, SO_SNDTIMEO, SO_ATTACH_FILTER, SO_DETACH_FILTER,
  6647. + * TCP_NODELAY, TCP_CORK
  6648. + *
  6649. + * Socket-options handled in this function here
  6650. + * ======
  6651. + * TCP_DEFER_ACCEPT
  6652. + *
  6653. + * Socket-options on the todo-list
  6654. + * ======
  6655. + * SO_BINDTODEVICE - should probably prevent creation of new subsocks
  6656. + * across other devices. - what about the api-draft?
  6657. + * SO_DEBUG
  6658. + * SO_REUSEADDR - probably we don't care about this
  6659. + * SO_DONTROUTE, SO_BROADCAST
  6660. + * SO_OOBINLINE
  6661. + * SO_LINGER
  6662. + * SO_TIMESTAMP* - I don't think this is of concern for a SOCK_STREAM
  6663. + * SO_PASSSEC - I don't think this is of concern for a SOCK_STREAM
  6664. + * SO_RXQ_OVFL
  6665. + * TCP_COOKIE_TRANSACTIONS
  6666. + * TCP_MAXSEG
  6667. + * TCP_THIN_* - Handled by mptcp_inherit_sk, but we need to support this
  6668. + * in mptcp_retransmit_timer. AND we need to check what is
  6669. + * about the subsockets.
  6670. + * TCP_LINGER2
  6671. + * TCP_WINDOW_CLAMP
  6672. + * TCP_USER_TIMEOUT
  6673. + * TCP_MD5SIG
  6674. + *
  6675. + * Socket-options of no concern for the meta-socket (but for the subsocket)
  6676. + * ======
  6677. + * SO_PRIORITY
  6678. + * SO_MARK
  6679. + * TCP_CONGESTION
  6680. + * TCP_SYNCNT
  6681. + * TCP_QUICKACK
  6682. + * SO_KEEPALIVE
  6683. + */
  6684. +
  6685. + /****** DEFER_ACCEPT-handler ******/
  6686. +
  6687. + /* DEFER_ACCEPT is not of concern for new subflows - we always accept
  6688. + * them
  6689. + */
  6690. + inet_csk(meta_sk)->icsk_accept_queue.rskq_defer_accept = 0;
  6691. +}
  6692. +
  6693. +static void mptcp_sub_inherit_sockopts(struct sock *meta_sk, struct sock *sub_sk)
  6694. +{
  6695. + /* IP_TOS also goes to the subflow. */
  6696. + if (inet_sk(sub_sk)->tos != inet_sk(meta_sk)->tos) {
  6697. + inet_sk(sub_sk)->tos = inet_sk(meta_sk)->tos;
  6698. + sub_sk->sk_priority = meta_sk->sk_priority;
  6699. + sk_dst_reset(sub_sk);
  6700. + }
  6701. +
  6702. + /* Inherit SO_REUSEADDR */
  6703. + sub_sk->sk_reuse = meta_sk->sk_reuse;
  6704. +
  6705. + /* Inherit snd/rcv-buffer locks */
  6706. + sub_sk->sk_userlocks = meta_sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
  6707. +}
  6708. +
  6709. +int mptcp_backlog_rcv(struct sock *meta_sk, struct sk_buff *skb)
  6710. +{
  6711. + /* skb-sk may be NULL if we receive a packet immediatly after the
  6712. + * SYN/ACK + MP_CAPABLE.
  6713. + */
  6714. + struct sock *sk = skb->sk ? skb->sk : meta_sk;
  6715. + int ret = 0;
  6716. +
  6717. + skb->sk = NULL;
  6718. +
  6719. + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) {
  6720. + kfree_skb(skb);
  6721. + return 0;
  6722. + }
  6723. +
  6724. + if (sk->sk_family == AF_INET)
  6725. + ret = tcp_v4_do_rcv(sk, skb);
  6726. +#if IS_ENABLED(CONFIG_IPV6)
  6727. + else
  6728. + ret = tcp_v6_do_rcv(sk, skb);
  6729. +#endif
  6730. +
  6731. + sock_put(sk);
  6732. + return ret;
  6733. +}
  6734. +
  6735. +struct lock_class_key meta_key;
  6736. +struct lock_class_key meta_slock_key;
  6737. +
  6738. +/* Code heavily inspired from sk_clone() */
  6739. +static int mptcp_inherit_sk(const struct sock *sk, struct sock *newsk,
  6740. + int family, const gfp_t flags)
  6741. +{
  6742. + struct sk_filter *filter;
  6743. + struct proto *prot = newsk->sk_prot;
  6744. + const struct inet_connection_sock_af_ops *af_ops = inet_csk(newsk)->icsk_af_ops;
  6745. +#ifdef CONFIG_SECURITY_NETWORK
  6746. + void *sptr = newsk->sk_security;
  6747. +#endif
  6748. +
  6749. + if (sk->sk_family == AF_INET) {
  6750. + memcpy(newsk, sk, offsetof(struct sock, sk_dontcopy_begin));
  6751. + memcpy(&newsk->sk_dontcopy_end, &sk->sk_dontcopy_end,
  6752. + sizeof(struct tcp_sock) - offsetof(struct sock, sk_dontcopy_end));
  6753. + } else {
  6754. + memcpy(newsk, sk, offsetof(struct sock, sk_dontcopy_begin));
  6755. + memcpy(&newsk->sk_dontcopy_end, &sk->sk_dontcopy_end,
  6756. + sizeof(struct tcp6_sock) - offsetof(struct sock, sk_dontcopy_end));
  6757. + }
  6758. +
  6759. +#ifdef CONFIG_SECURITY_NETWORK
  6760. + newsk->sk_security = sptr;
  6761. + security_sk_clone(sk, newsk);
  6762. +#endif
  6763. +
  6764. + /* Has been changed by sock_copy above - we may need an IPv6-socket */
  6765. + newsk->sk_family = family;
  6766. + newsk->sk_prot = prot;
  6767. + newsk->sk_prot_creator = prot;
  6768. + inet_csk(newsk)->icsk_af_ops = af_ops;
  6769. +
  6770. + /* We don't yet have the mptcp-point. Thus we still need inet_sock_destruct */
  6771. + newsk->sk_destruct = inet_sock_destruct;
  6772. +
  6773. + /* SANITY */
  6774. + get_net(sock_net(newsk));
  6775. + sk_node_init(&newsk->sk_node);
  6776. + sock_lock_init_class_and_name(newsk, "slock-AF_INET-MPTCP",
  6777. + &meta_slock_key, "sk_lock-AF_INET-MPTCP",
  6778. + &meta_key);
  6779. +
  6780. + /* Unlocks are in:
  6781. + *
  6782. + * 1. If we are creating the master-sk
  6783. + * * on client-side in tcp_rcv_state_process, "case TCP_SYN_SENT"
  6784. + * * on server-side in tcp_child_process
  6785. + * 2. If we are creating another subsock
  6786. + * * Also in tcp_child_process
  6787. + */
  6788. + bh_lock_sock(newsk);
  6789. + newsk->sk_backlog.head = NULL;
  6790. + newsk->sk_backlog.tail = NULL;
  6791. + newsk->sk_backlog.len = 0;
  6792. +
  6793. + atomic_set(&newsk->sk_rmem_alloc, 0);
  6794. + atomic_set(&newsk->sk_wmem_alloc, 1);
  6795. + atomic_set(&newsk->sk_omem_alloc, 0);
  6796. +
  6797. + skb_queue_head_init(&newsk->sk_receive_queue);
  6798. + skb_queue_head_init(&newsk->sk_write_queue);
  6799. +#ifdef CONFIG_NET_DMA
  6800. + skb_queue_head_init(&newsk->sk_async_wait_queue);
  6801. +#endif
  6802. +
  6803. + spin_lock_init(&newsk->sk_dst_lock);
  6804. + rwlock_init(&newsk->sk_callback_lock);
  6805. + lockdep_set_class_and_name(&newsk->sk_callback_lock,
  6806. + af_callback_keys + newsk->sk_family,
  6807. + af_family_clock_key_strings[newsk->sk_family]);
  6808. + newsk->sk_dst_cache = NULL;
  6809. + newsk->sk_rx_dst = NULL;
  6810. + newsk->sk_wmem_queued = 0;
  6811. + newsk->sk_forward_alloc = 0;
  6812. + newsk->sk_send_head = NULL;
  6813. + newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
  6814. +
  6815. + tcp_sk(newsk)->mptcp = NULL;
  6816. +
  6817. + sock_reset_flag(newsk, SOCK_DONE);
  6818. + skb_queue_head_init(&newsk->sk_error_queue);
  6819. +
  6820. + filter = rcu_dereference_protected(newsk->sk_filter, 1);
  6821. + if (filter != NULL)
  6822. + sk_filter_charge(newsk, filter);
  6823. +
  6824. + if (unlikely(xfrm_sk_clone_policy(newsk))) {
  6825. + /* It is still raw copy of parent, so invalidate
  6826. + * destructor and make plain sk_free()
  6827. + */
  6828. + newsk->sk_destruct = NULL;
  6829. + bh_unlock_sock(newsk);
  6830. + sk_free(newsk);
  6831. + newsk = NULL;
  6832. + return -ENOMEM;
  6833. + }
  6834. +
  6835. + newsk->sk_err = 0;
  6836. + newsk->sk_priority = 0;
  6837. + /* Before updating sk_refcnt, we must commit prior changes to memory
  6838. + * (Documentation/RCU/rculist_nulls.txt for details)
  6839. + */
  6840. + smp_wmb();
  6841. + atomic_set(&newsk->sk_refcnt, 2);
  6842. +
  6843. + /* Increment the counter in the same struct proto as the master
  6844. + * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
  6845. + * is the same as sk->sk_prot->socks, as this field was copied
  6846. + * with memcpy).
  6847. + *
  6848. + * This _changes_ the previous behaviour, where
  6849. + * tcp_create_openreq_child always was incrementing the
  6850. + * equivalent to tcp_prot->socks (inet_sock_nr), so this have
  6851. + * to be taken into account in all callers. -acme
  6852. + */
  6853. + sk_refcnt_debug_inc(newsk);
  6854. + sk_set_socket(newsk, NULL);
  6855. + newsk->sk_wq = NULL;
  6856. +
  6857. + if (newsk->sk_prot->sockets_allocated)
  6858. + percpu_counter_inc(newsk->sk_prot->sockets_allocated);
  6859. +
  6860. + if (sock_flag(newsk, SOCK_TIMESTAMP) ||
  6861. + sock_flag(newsk, SOCK_TIMESTAMPING_RX_SOFTWARE))
  6862. + net_enable_timestamp();
  6863. +
  6864. + return 0;
  6865. +}
  6866. +
  6867. +int mptcp_alloc_mpcb(struct sock *meta_sk, __u64 remote_key, u32 window)
  6868. +{
  6869. + struct mptcp_cb *mpcb;
  6870. + struct sock *master_sk;
  6871. + struct inet_connection_sock *master_icsk, *meta_icsk = inet_csk(meta_sk);
  6872. + struct tcp_sock *master_tp, *meta_tp = tcp_sk(meta_sk);
  6873. + struct sk_buff *skb, *tmp;
  6874. + u64 idsn;
  6875. +
  6876. + master_sk = sk_prot_alloc(meta_sk->sk_prot, GFP_ATOMIC | __GFP_ZERO,
  6877. + meta_sk->sk_family);
  6878. + if (!master_sk)
  6879. + return -ENOBUFS;
  6880. +
  6881. + master_tp = tcp_sk(master_sk);
  6882. + master_icsk = inet_csk(master_sk);
  6883. +
  6884. + /* Need to set this here - it is needed by mptcp_inherit_sk */
  6885. + master_sk->sk_prot = meta_sk->sk_prot;
  6886. + master_sk->sk_prot_creator = meta_sk->sk_prot;
  6887. + master_icsk->icsk_af_ops = meta_icsk->icsk_af_ops;
  6888. +
  6889. + mpcb = kmem_cache_zalloc(mptcp_cb_cache, GFP_ATOMIC);
  6890. + if (!mpcb) {
  6891. + sk_free(master_sk);
  6892. + return -ENOBUFS;
  6893. + }
  6894. +
  6895. + /* master_sk inherits from meta_sk */
  6896. + if (mptcp_inherit_sk(meta_sk, master_sk, meta_sk->sk_family, GFP_ATOMIC)) {
  6897. + kmem_cache_free(mptcp_cb_cache, mpcb);
  6898. + return -ENOBUFS;
  6899. + }
  6900. +
  6901. +#if IS_ENABLED(CONFIG_IPV6)
  6902. + if (meta_icsk->icsk_af_ops == &ipv6_mapped) {
  6903. + struct ipv6_pinfo *newnp, *np = inet6_sk(meta_sk);
  6904. +
  6905. + inet_sk(master_sk)->pinet6 = &((struct tcp6_sock *)master_sk)->inet6;
  6906. +
  6907. + newnp = inet6_sk(master_sk);
  6908. + memcpy(newnp, np, sizeof(struct ipv6_pinfo));
  6909. +
  6910. + newnp->ipv6_mc_list = NULL;
  6911. + newnp->ipv6_ac_list = NULL;
  6912. + newnp->ipv6_fl_list = NULL;
  6913. + newnp->opt = NULL;
  6914. + newnp->pktoptions = NULL;
  6915. + (void)xchg(&newnp->rxpmtu, NULL);
  6916. + } else if (meta_sk->sk_family == AF_INET6) {
  6917. + struct ipv6_pinfo *newnp, *np = inet6_sk(meta_sk);
  6918. +
  6919. + inet_sk(master_sk)->pinet6 = &((struct tcp6_sock *)master_sk)->inet6;
  6920. +
  6921. + newnp = inet6_sk(master_sk);
  6922. + memcpy(newnp, np, sizeof(struct ipv6_pinfo));
  6923. +
  6924. + newnp->hop_limit = -1;
  6925. + newnp->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
  6926. + newnp->mc_loop = 1;
  6927. + newnp->pmtudisc = IPV6_PMTUDISC_WANT;
  6928. + newnp->ipv6only = sock_net(master_sk)->ipv6.sysctl.bindv6only;
  6929. + }
  6930. +#endif
  6931. +
  6932. + meta_tp->mptcp = kmem_cache_zalloc(mptcp_sock_cache, GFP_ATOMIC);
  6933. + if (!meta_tp->mptcp) {
  6934. + kmem_cache_free(mptcp_cb_cache, mpcb);
  6935. + sk_free(master_sk);
  6936. + return -ENOBUFS;
  6937. + }
  6938. +
  6939. + INIT_LIST_HEAD(&meta_tp->mptcp->cb_list);
  6940. +
  6941. + /* Store the keys and generate the peer's token */
  6942. + mpcb->mptcp_loc_key = meta_tp->mptcp_loc_key;
  6943. + mpcb->mptcp_loc_token = meta_tp->mptcp_loc_token;
  6944. +
  6945. + /* Generate Initial data-sequence-numbers */
  6946. + mptcp_key_sha1(mpcb->mptcp_loc_key, NULL, &idsn);
  6947. + idsn = ntohll(idsn) + 1;
  6948. + mpcb->snd_high_order[0] = idsn >> 32;
  6949. + mpcb->snd_high_order[1] = mpcb->snd_high_order[0] - 1;
  6950. +
  6951. + meta_tp->write_seq = (u32)idsn;
  6952. + meta_tp->snd_sml = meta_tp->write_seq;
  6953. + meta_tp->snd_una = meta_tp->write_seq;
  6954. + meta_tp->snd_nxt = meta_tp->write_seq;
  6955. + meta_tp->pushed_seq = meta_tp->write_seq;
  6956. + meta_tp->snd_up = meta_tp->write_seq;
  6957. +
  6958. + mpcb->mptcp_rem_key = remote_key;
  6959. + mptcp_key_sha1(mpcb->mptcp_rem_key, &mpcb->mptcp_rem_token, &idsn);
  6960. + idsn = ntohll(idsn) + 1;
  6961. + mpcb->rcv_high_order[0] = idsn >> 32;
  6962. + mpcb->rcv_high_order[1] = mpcb->rcv_high_order[0] + 1;
  6963. + meta_tp->copied_seq = (u32) idsn;
  6964. + meta_tp->rcv_nxt = (u32) idsn;
  6965. + meta_tp->rcv_wup = (u32) idsn;
  6966. +
  6967. + meta_tp->snd_wl1 = meta_tp->rcv_nxt - 1;
  6968. + meta_tp->snd_wnd = window;
  6969. + meta_tp->retrans_stamp = 0; /* Set in tcp_connect() */
  6970. +
  6971. + meta_tp->packets_out = 0;
  6972. + meta_tp->mptcp->snt_isn = meta_tp->write_seq; /* Initial data-sequence-number */
  6973. + meta_icsk->icsk_probes_out = 0;
  6974. +
  6975. + /* Set mptcp-pointers */
  6976. + master_tp->mpcb = mpcb;
  6977. + master_tp->meta_sk = meta_sk;
  6978. + meta_tp->mpcb = mpcb;
  6979. + meta_tp->meta_sk = meta_sk;
  6980. + mpcb->meta_sk = meta_sk;
  6981. + mpcb->master_sk = master_sk;
  6982. +
  6983. + set_mpc(meta_tp);
  6984. + meta_tp->mptcp->attached = 0;
  6985. + meta_tp->was_meta_sk = 0;
  6986. +
  6987. + /* Initialize the queues */
  6988. + skb_queue_head_init(&mpcb->reinject_queue);
  6989. + skb_queue_head_init(&master_tp->out_of_order_queue);
  6990. + tcp_prequeue_init(master_tp);
  6991. + INIT_LIST_HEAD(&master_tp->tsq_node);
  6992. +
  6993. + master_tp->tsq_flags = 0;
  6994. +
  6995. + /* Copy the write-queue from the meta down to the master.
  6996. + * This is necessary to get the SYN to the master-write-queue.
  6997. + * No other data can be queued, before tcp_sendmsg waits for the
  6998. + * connection to finish.
  6999. + */
  7000. + skb_queue_walk_safe(&meta_sk->sk_write_queue, skb, tmp) {
  7001. + skb_unlink(skb, &meta_sk->sk_write_queue);
  7002. + skb_queue_tail(&master_sk->sk_write_queue, skb);
  7003. +
  7004. + master_sk->sk_wmem_queued += skb->truesize;
  7005. + sk_mem_charge(master_sk, skb->truesize);
  7006. + }
  7007. +
  7008. + meta_sk->sk_wmem_queued = 0;
  7009. + meta_sk->sk_forward_alloc = 0;
  7010. +
  7011. + mutex_init(&mpcb->mpcb_mutex);
  7012. +
  7013. + /* Init the accept_queue structure, we support a queue of 32 pending
  7014. + * connections, it does not need to be huge, since we only store here
  7015. + * pending subflow creations.
  7016. + */
  7017. + if (reqsk_queue_alloc(&meta_icsk->icsk_accept_queue, 32, GFP_ATOMIC)) {
  7018. + inet_put_port(master_sk);
  7019. + kmem_cache_free(mptcp_sock_cache, meta_tp->mptcp);
  7020. + kmem_cache_free(mptcp_cb_cache, mpcb);
  7021. + sk_free(master_sk);
  7022. + reset_mpc(meta_tp);
  7023. + return -ENOMEM;
  7024. + }
  7025. +
  7026. + /* Redefine function-pointers as the meta-sk is now fully ready */
  7027. + meta_sk->sk_backlog_rcv = mptcp_backlog_rcv;
  7028. + meta_sk->sk_destruct = mptcp_sock_destruct;
  7029. + mpcb->syn_recv_sock = mptcp_syn_recv_sock;
  7030. +
  7031. + /* Meta-level retransmit timer */
  7032. + meta_icsk->icsk_rto *= 2; /* Double of initial - rto */
  7033. +
  7034. + tcp_init_xmit_timers(master_sk);
  7035. + /* Has been set for sending out the SYN */
  7036. + inet_csk_clear_xmit_timer(meta_sk, ICSK_TIME_RETRANS);
  7037. +
  7038. + if (!meta_tp->inside_tk_table) {
  7039. + /* Adding the meta_tp in the token hashtable - coming from server-side */
  7040. + rcu_read_lock();
  7041. + spin_lock(&mptcp_tk_hashlock);
  7042. +
  7043. + __mptcp_hash_insert(meta_tp, mpcb->mptcp_loc_token);
  7044. +
  7045. + spin_unlock(&mptcp_tk_hashlock);
  7046. + rcu_read_unlock();
  7047. + }
  7048. + master_tp->inside_tk_table = 0;
  7049. +
  7050. + /* Init time-wait stuff */
  7051. + INIT_LIST_HEAD(&mpcb->tw_list);
  7052. + spin_lock_init(&mpcb->tw_lock);
  7053. +
  7054. + INIT_LIST_HEAD(&mpcb->callback_list);
  7055. +
  7056. + mptcp_mpcb_inherit_sockopts(meta_sk, master_sk);
  7057. +
  7058. + mpcb->orig_sk_rcvbuf = meta_sk->sk_rcvbuf;
  7059. + mpcb->orig_sk_sndbuf = meta_sk->sk_sndbuf;
  7060. + mpcb->orig_window_clamp = meta_tp->window_clamp;
  7061. +
  7062. + /* The meta is directly linked - set refcnt to 1 */
  7063. + atomic_set(&mpcb->mpcb_refcnt, 1);
  7064. +
  7065. + mptcp_init_path_manager(mpcb);
  7066. +
  7067. + mptcp_debug("%s: created mpcb with token %#x\n",
  7068. + __func__, mpcb->mptcp_loc_token);
  7069. +
  7070. + return 0;
  7071. +}
  7072. +
  7073. +struct sock *mptcp_sk_clone(const struct sock *sk, int family,
  7074. + const gfp_t priority)
  7075. +{
  7076. + struct sock *newsk = NULL;
  7077. +
  7078. + if (family == AF_INET && sk->sk_family == AF_INET) {
  7079. + newsk = sk_prot_alloc(&tcp_prot, priority, family);
  7080. + if (!newsk)
  7081. + return NULL;
  7082. +
  7083. + /* Set these pointers - they are needed by mptcp_inherit_sk */
  7084. + newsk->sk_prot = &tcp_prot;
  7085. + newsk->sk_prot_creator = &tcp_prot;
  7086. + inet_csk(newsk)->icsk_af_ops = &ipv4_specific;
  7087. + newsk->sk_family = AF_INET;
  7088. + }
  7089. +#if IS_ENABLED(CONFIG_IPV6)
  7090. + else {
  7091. + newsk = sk_prot_alloc(&tcpv6_prot, priority, family);
  7092. + if (!newsk)
  7093. + return NULL;
  7094. +
  7095. + newsk->sk_prot = &tcpv6_prot;
  7096. + newsk->sk_prot_creator = &tcpv6_prot;
  7097. + if (family == AF_INET)
  7098. + inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
  7099. + else
  7100. + inet_csk(newsk)->icsk_af_ops = &ipv6_specific;
  7101. + newsk->sk_family = AF_INET6;
  7102. + }
  7103. +#endif
  7104. +
  7105. + if (mptcp_inherit_sk(sk, newsk, family, priority))
  7106. + return NULL;
  7107. +
  7108. + return newsk;
  7109. +}
  7110. +
  7111. +void mptcp_fallback_meta_sk(struct sock *meta_sk)
  7112. +{
  7113. + kfree(inet_csk(meta_sk)->icsk_accept_queue.listen_opt);
  7114. + kmem_cache_free(mptcp_sock_cache, tcp_sk(meta_sk)->mptcp);
  7115. + kmem_cache_free(mptcp_cb_cache, tcp_sk(meta_sk)->mpcb);
  7116. +}
  7117. +
  7118. +int mptcp_add_sock(struct sock *meta_sk, struct sock *sk, u8 loc_id, u8 rem_id,
  7119. + gfp_t flags)
  7120. +{
  7121. + struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
  7122. + struct tcp_sock *tp = tcp_sk(sk);
  7123. +
  7124. + tp->mptcp = kmem_cache_zalloc(mptcp_sock_cache, flags);
  7125. + if (!tp->mptcp)
  7126. + return -ENOMEM;
  7127. +
  7128. + tp->mptcp->path_index = mptcp_set_new_pathindex(mpcb);
  7129. + /* No more space for more subflows? */
  7130. + if (!tp->mptcp->path_index) {
  7131. + kmem_cache_free(mptcp_sock_cache, tp->mptcp);
  7132. + return -EPERM;
  7133. + }
  7134. +
  7135. + INIT_LIST_HEAD(&tp->mptcp->cb_list);
  7136. +
  7137. + tp->mptcp->tp = tp;
  7138. + tp->mpcb = mpcb;
  7139. + tp->meta_sk = meta_sk;
  7140. + set_mpc(tp);
  7141. + tp->mptcp->loc_id = loc_id;
  7142. + tp->mptcp->rem_id = rem_id;
  7143. + tp->mptcp->last_rbuf_opti = tcp_time_stamp;
  7144. +
  7145. + /* The corresponding sock_put is in mptcp_sock_destruct(). It cannot be
  7146. + * included in mptcp_del_sock(), because the mpcb must remain alive
  7147. + * until the last subsocket is completely destroyed.
  7148. + */
  7149. + sock_hold(meta_sk);
  7150. + atomic_inc(&mpcb->mpcb_refcnt);
  7151. +
  7152. + tp->mptcp->next = mpcb->connection_list;
  7153. + mpcb->connection_list = tp;
  7154. + tp->mptcp->attached = 1;
  7155. +
  7156. + mpcb->cnt_subflows++;
  7157. + atomic_add(atomic_read(&((struct sock *)tp)->sk_rmem_alloc),
  7158. + &meta_sk->sk_rmem_alloc);
  7159. +
  7160. + mptcp_sub_inherit_sockopts(meta_sk, sk);
  7161. + INIT_DELAYED_WORK(&tp->mptcp->work, mptcp_sub_close_wq);
  7162. +
  7163. + /* As we successfully allocated the mptcp_tcp_sock, we have to
  7164. + * change the function-pointers here (for sk_destruct to work correctly)
  7165. + */
  7166. + sk->sk_error_report = mptcp_sock_def_error_report;
  7167. + sk->sk_data_ready = mptcp_data_ready;
  7168. + sk->sk_write_space = mptcp_write_space;
  7169. + sk->sk_state_change = mptcp_set_state;
  7170. + sk->sk_destruct = mptcp_sock_destruct;
  7171. +
  7172. + if (sk->sk_family == AF_INET)
  7173. + mptcp_debug("%s: token %#x pi %d, src_addr:%pI4:%d dst_addr:%pI4:%d, cnt_subflows now %d\n",
  7174. + __func__ , mpcb->mptcp_loc_token,
  7175. + tp->mptcp->path_index,
  7176. + &((struct inet_sock *)tp)->inet_saddr,
  7177. + ntohs(((struct inet_sock *)tp)->inet_sport),
  7178. + &((struct inet_sock *)tp)->inet_daddr,
  7179. + ntohs(((struct inet_sock *)tp)->inet_dport),
  7180. + mpcb->cnt_subflows);
  7181. +#if IS_ENABLED(CONFIG_IPV6)
  7182. + else
  7183. + mptcp_debug("%s: token %#x pi %d, src_addr:%pI6:%d dst_addr:%pI6:%d, cnt_subflows now %d\n",
  7184. + __func__ , mpcb->mptcp_loc_token,
  7185. + tp->mptcp->path_index, &inet6_sk(sk)->saddr,
  7186. + ntohs(((struct inet_sock *)tp)->inet_sport),
  7187. + &sk->sk_v6_daddr,
  7188. + ntohs(((struct inet_sock *)tp)->inet_dport),
  7189. + mpcb->cnt_subflows);
  7190. +#endif
  7191. +
  7192. + return 0;
  7193. +}
  7194. +
  7195. +void mptcp_del_sock(struct sock *sk)
  7196. +{
  7197. + struct tcp_sock *tp = tcp_sk(sk), *tp_prev;
  7198. + struct mptcp_cb *mpcb;
  7199. +
  7200. + if (!tp->mptcp || !tp->mptcp->attached)
  7201. + return;
  7202. +
  7203. + mpcb = tp->mpcb;
  7204. + tp_prev = mpcb->connection_list;
  7205. +
  7206. + mptcp_debug("%s: Removing subsock tok %#x pi:%d state %d is_meta? %d\n",
  7207. + __func__, mpcb->mptcp_loc_token, tp->mptcp->path_index,
  7208. + sk->sk_state, is_meta_sk(sk));
  7209. +
  7210. + if (tp_prev == tp) {
  7211. + mpcb->connection_list = tp->mptcp->next;
  7212. + } else {
  7213. + for (; tp_prev && tp_prev->mptcp->next; tp_prev = tp_prev->mptcp->next) {
  7214. + if (tp_prev->mptcp->next == tp) {
  7215. + tp_prev->mptcp->next = tp->mptcp->next;
  7216. + break;
  7217. + }
  7218. + }
  7219. + }
  7220. + mpcb->cnt_subflows--;
  7221. + if (tp->mptcp->establish_increased)
  7222. + mpcb->cnt_established--;
  7223. +
  7224. + tp->mptcp->next = NULL;
  7225. + tp->mptcp->attached = 0;
  7226. + mpcb->path_index_bits &= ~(1 << tp->mptcp->path_index);
  7227. +
  7228. + if (!skb_queue_empty(&sk->sk_write_queue))
  7229. + mptcp_reinject_data(sk, 0);
  7230. +
  7231. + if (is_master_tp(tp))
  7232. + mpcb->master_sk = NULL;
  7233. + else if (tp->mptcp->pre_established)
  7234. + sk_stop_timer(sk, &tp->mptcp->mptcp_ack_timer);
  7235. +
  7236. + rcu_assign_pointer(inet_sk(sk)->inet_opt, NULL);
  7237. +}
  7238. +
  7239. +/* Updates the metasocket ULID/port data, based on the given sock.
  7240. + * The argument sock must be the sock accessible to the application.
  7241. + * In this function, we update the meta socket info, based on the changes
  7242. + * in the application socket (bind, address allocation, ...)
  7243. + */
  7244. +void mptcp_update_metasocket(struct sock *sk, struct sock *meta_sk)
  7245. +{
  7246. + struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
  7247. + union inet_addr addr;
  7248. + int index;
  7249. +
  7250. + /* Get the index of the local address */
  7251. + if (sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(sk)) {
  7252. + addr.ip = inet_sk(sk)->inet_saddr;
  7253. + index = mpcb->pm_ops->get_local_index(AF_INET, &addr, sock_net(meta_sk));
  7254. + } else {
  7255. + addr.in6 = inet6_sk(sk)->saddr;
  7256. + index = mpcb->pm_ops->get_local_index(AF_INET6, &addr, sock_net(meta_sk));
  7257. + }
  7258. +
  7259. + if (sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(sk)) {
  7260. + mptcp_v4_add_raddress(mpcb,
  7261. + (struct in_addr *)&inet_sk(sk)->inet_daddr,
  7262. + 0, 0);
  7263. + if (index >= 0)
  7264. + mptcp_v4_set_init_addr_bit(mpcb, inet_sk(sk)->inet_daddr, index);
  7265. + } else {
  7266. +#if IS_ENABLED(CONFIG_IPV6)
  7267. + mptcp_v6_add_raddress(mpcb, &sk->sk_v6_daddr, 0, 0);
  7268. + if (index >= 0)
  7269. + mptcp_v6_set_init_addr_bit(mpcb, &sk->sk_v6_daddr, index);
  7270. +#endif
  7271. + }
  7272. +
  7273. + if (mpcb->pm_ops->new_session)
  7274. + mpcb->pm_ops->new_session(meta_sk, index);
  7275. +
  7276. + tcp_sk(sk)->mptcp->send_mp_prio = tcp_sk(sk)->mptcp->low_prio;
  7277. +}
  7278. +
  7279. +/* Clean up the receive buffer for full frames taken by the user,
  7280. + * then send an ACK if necessary. COPIED is the number of bytes
  7281. + * tcp_recvmsg has given to the user so far, it speeds up the
  7282. + * calculation of whether or not we must ACK for the sake of
  7283. + * a window update.
  7284. + */
  7285. +void mptcp_cleanup_rbuf(struct sock *meta_sk, int copied)
  7286. +{
  7287. + struct tcp_sock *meta_tp = tcp_sk(meta_sk);
  7288. + struct sock *sk;
  7289. + __u32 rcv_window_now = 0;
  7290. +
  7291. + if (copied > 0 && !(meta_sk->sk_shutdown & RCV_SHUTDOWN)) {
  7292. + rcv_window_now = tcp_receive_window(meta_tp);
  7293. +
  7294. + if (2 * rcv_window_now > meta_tp->window_clamp)
  7295. + rcv_window_now = 0;
  7296. + }
  7297. +
  7298. + mptcp_for_each_sk(meta_tp->mpcb, sk) {
  7299. + struct tcp_sock *tp = tcp_sk(sk);
  7300. + const struct inet_connection_sock *icsk = inet_csk(sk);
  7301. +
  7302. + if (!mptcp_sk_can_send_ack(sk))
  7303. + continue;
  7304. +
  7305. + if (!inet_csk_ack_scheduled(sk))
  7306. + goto second_part;
  7307. + /* Delayed ACKs frequently hit locked sockets during bulk
  7308. + * receive.
  7309. + */
  7310. + if (icsk->icsk_ack.blocked ||
  7311. + /* Once-per-two-segments ACK was not sent by tcp_input.c */
  7312. + tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss ||
  7313. + /* If this read emptied read buffer, we send ACK, if
  7314. + * connection is not bidirectional, user drained
  7315. + * receive buffer and there was a small segment
  7316. + * in queue.
  7317. + */
  7318. + (copied > 0 &&
  7319. + ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) ||
  7320. + ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
  7321. + !icsk->icsk_ack.pingpong)) &&
  7322. + !atomic_read(&meta_sk->sk_rmem_alloc))) {
  7323. + tcp_send_ack(sk);
  7324. + continue;
  7325. + }
  7326. +
  7327. +second_part:
  7328. + /* This here is the second part of tcp_cleanup_rbuf */
  7329. + if (rcv_window_now) {
  7330. + __u32 new_window = tp->__select_window(sk);
  7331. +
  7332. + /* Send ACK now, if this read freed lots of space
  7333. + * in our buffer. Certainly, new_window is new window.
  7334. + * We can advertise it now, if it is not less than
  7335. + * current one.
  7336. + * "Lots" means "at least twice" here.
  7337. + */
  7338. + if (new_window && new_window >= 2 * rcv_window_now)
  7339. + tcp_send_ack(sk);
  7340. + }
  7341. + }
  7342. +}
  7343. +
  7344. +static int mptcp_sub_send_fin(struct sock *sk)
  7345. +{
  7346. + struct tcp_sock *tp = tcp_sk(sk);
  7347. + struct sk_buff *skb = tcp_write_queue_tail(sk);
  7348. + int mss_now;
  7349. +
  7350. + /* Optimization, tack on the FIN if we have a queue of
  7351. + * unsent frames. But be careful about outgoing SACKS
  7352. + * and IP options.
  7353. + */
  7354. + mss_now = tcp_current_mss(sk);
  7355. +
  7356. + if (tcp_send_head(sk) != NULL) {
  7357. + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN;
  7358. + TCP_SKB_CB(skb)->end_seq++;
  7359. + tp->write_seq++;
  7360. + } else {
  7361. + skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_ATOMIC);
  7362. + if (!skb)
  7363. + return 1;
  7364. +
  7365. + /* Reserve space for headers and prepare control bits. */
  7366. + skb_reserve(skb, MAX_TCP_HEADER);
  7367. + /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
  7368. + tcp_init_nondata_skb(skb, tp->write_seq,
  7369. + TCPHDR_ACK | TCPHDR_FIN);
  7370. + tcp_queue_skb(sk, skb);
  7371. + }
  7372. + __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
  7373. +
  7374. + return 0;
  7375. +}
  7376. +
  7377. +void mptcp_sub_close_wq(struct work_struct *work)
  7378. +{
  7379. + struct mptcp_tcp_sock *mptcp = container_of(work, struct mptcp_tcp_sock, work.work);
  7380. + struct tcp_sock *tp = mptcp->tp;
  7381. + struct sock *sk = (struct sock *)tp;
  7382. + struct sock *meta_sk = mptcp_meta_sk(sk);
  7383. +
  7384. + mutex_lock(&tp->mpcb->mpcb_mutex);
  7385. + lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING);
  7386. +
  7387. + if (sock_flag(sk, SOCK_DEAD))
  7388. + goto exit;
  7389. +
  7390. + /* We come from tcp_disconnect. We are sure that meta_sk is set */
  7391. + if (!tp->mpc) {
  7392. + tp->closing = 1;
  7393. + sock_rps_reset_flow(sk);
  7394. + tcp_close(sk, 0);
  7395. + goto exit;
  7396. + }
  7397. +
  7398. + if (meta_sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE) {
  7399. + tp->closing = 1;
  7400. + sock_rps_reset_flow(sk);
  7401. + tcp_close(sk, 0);
  7402. + } else if (tcp_close_state(sk)) {
  7403. + sk->sk_shutdown |= SEND_SHUTDOWN;
  7404. + tcp_send_fin(sk);
  7405. + }
  7406. +
  7407. +exit:
  7408. + release_sock(meta_sk);
  7409. + mutex_unlock(&tp->mpcb->mpcb_mutex);
  7410. + sock_put(sk);
  7411. +}
  7412. +
  7413. +void mptcp_sub_close(struct sock *sk, unsigned long delay)
  7414. +{
  7415. + struct tcp_sock *tp = tcp_sk(sk);
  7416. + struct delayed_work *work = &tcp_sk(sk)->mptcp->work;
  7417. +
  7418. + /* We are already closing - e.g., call from sock_def_error_report upon
  7419. + * tcp_disconnect in tcp_close.
  7420. + */
  7421. + if (tp->closing)
  7422. + return;
  7423. +
  7424. + /* Work already scheduled ? */
  7425. + if (work_pending(&work->work)) {
  7426. + /* Work present - who will be first ? */
  7427. + if (jiffies + delay > work->timer.expires)
  7428. + return;
  7429. +
  7430. + /* Try canceling - if it fails, work will be executed soon */
  7431. + if (!cancel_delayed_work(work))
  7432. + return;
  7433. + sock_put(sk);
  7434. + }
  7435. +
  7436. + if (!delay) {
  7437. + unsigned char old_state = sk->sk_state;
  7438. +
  7439. + /* If we are in user-context we can directly do the closing
  7440. + * procedure. No need to schedule a work-queue.
  7441. + */
  7442. + if (!in_softirq()) {
  7443. + if (sock_flag(sk, SOCK_DEAD))
  7444. + return;
  7445. +
  7446. + if (!tp->mpc) {
  7447. + tp->closing = 1;
  7448. + sock_rps_reset_flow(sk);
  7449. + tcp_close(sk, 0);
  7450. + return;
  7451. + }
  7452. +
  7453. + if (mptcp_meta_sk(sk)->sk_shutdown == SHUTDOWN_MASK ||
  7454. + sk->sk_state == TCP_CLOSE) {
  7455. + tp->closing = 1;
  7456. + sock_rps_reset_flow(sk);
  7457. + tcp_close(sk, 0);
  7458. + } else if (tcp_close_state(sk)) {
  7459. + sk->sk_shutdown |= SEND_SHUTDOWN;
  7460. + tcp_send_fin(sk);
  7461. + }
  7462. +
  7463. + return;
  7464. + }
  7465. +
  7466. + /* We directly send the FIN. Because it may take so a long time,
  7467. + * untile the work-queue will get scheduled...
  7468. + *
  7469. + * If mptcp_sub_send_fin returns 1, it failed and thus we reset
  7470. + * the old state so that tcp_close will finally send the fin
  7471. + * in user-context.
  7472. + */
  7473. + if (!sk->sk_err && old_state != TCP_CLOSE &&
  7474. + tcp_close_state(sk) && mptcp_sub_send_fin(sk)) {
  7475. + if (old_state == TCP_ESTABLISHED)
  7476. + TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
  7477. + sk->sk_state = old_state;
  7478. + }
  7479. + }
  7480. +
  7481. + sock_hold(sk);
  7482. + queue_delayed_work(mptcp_wq, work, delay);
  7483. +}
  7484. +
  7485. +void mptcp_sub_force_close(struct sock *sk)
  7486. +{
  7487. + /* The below tcp_done may have freed the socket, if he is already dead.
  7488. + * Thus, we are not allowed to access it afterwards. That's why
  7489. + * we have to store the dead-state in this local variable.
  7490. + */
  7491. + int sock_is_dead = sock_flag(sk, SOCK_DEAD);
  7492. +
  7493. + tcp_sk(sk)->mp_killed = 1;
  7494. +
  7495. + if (sk->sk_state != TCP_CLOSE)
  7496. + tcp_done(sk);
  7497. +
  7498. + if (!sock_is_dead)
  7499. + mptcp_sub_close(sk, 0);
  7500. +}
  7501. +EXPORT_SYMBOL(mptcp_sub_force_close);
  7502. +
  7503. +/* Update the mpcb send window, based on the contributions
  7504. + * of each subflow
  7505. + */
  7506. +void mptcp_update_sndbuf(struct mptcp_cb *mpcb)
  7507. +{
  7508. + struct sock *meta_sk = mpcb->meta_sk, *sk;
  7509. + int new_sndbuf = 0, old_sndbuf = meta_sk->sk_sndbuf;
  7510. + mptcp_for_each_sk(mpcb, sk) {
  7511. + if (!mptcp_sk_can_send(sk))
  7512. + continue;
  7513. +
  7514. + new_sndbuf += sk->sk_sndbuf;
  7515. +
  7516. + if (new_sndbuf > sysctl_tcp_wmem[2] || new_sndbuf < 0) {
  7517. + new_sndbuf = sysctl_tcp_wmem[2];
  7518. + break;
  7519. + }
  7520. + }
  7521. + meta_sk->sk_sndbuf = max(min(new_sndbuf, sysctl_tcp_wmem[2]), meta_sk->sk_sndbuf);
  7522. +
  7523. + /* The subflow's call to sk_write_space in tcp_new_space ends up in
  7524. + * mptcp_write_space.
  7525. + * It has nothing to do with waking up the application.
  7526. + * So, we do it here.
  7527. + */
  7528. + if (old_sndbuf != meta_sk->sk_sndbuf)
  7529. + meta_sk->sk_write_space(meta_sk);
  7530. +}
  7531. +
  7532. +void mptcp_close(struct sock *meta_sk, long timeout)
  7533. +{
  7534. + struct tcp_sock *meta_tp = tcp_sk(meta_sk);
  7535. + struct sock *sk_it, *tmpsk;
  7536. + struct mptcp_cb *mpcb = meta_tp->mpcb;
  7537. + struct sk_buff *skb;
  7538. + int data_was_unread = 0;
  7539. + int state;
  7540. +
  7541. + mptcp_debug("%s: Close of meta_sk with tok %#x\n",
  7542. + __func__, mpcb->mptcp_loc_token);
  7543. +
  7544. + mutex_lock(&mpcb->mpcb_mutex);
  7545. + lock_sock(meta_sk);
  7546. +
  7547. + if (meta_tp->inside_tk_table) {
  7548. + /* Detach the mpcb from the token hashtable */
  7549. + mptcp_hash_remove_bh(meta_tp);
  7550. + reqsk_queue_destroy(&inet_csk(meta_sk)->icsk_accept_queue);
  7551. + }
  7552. +
  7553. + meta_sk->sk_shutdown = SHUTDOWN_MASK;
  7554. + /* We need to flush the recv. buffs. We do this only on the
  7555. + * descriptor close, not protocol-sourced closes, because the
  7556. + * reader process may not have drained the data yet!
  7557. + */
  7558. + while ((skb = __skb_dequeue(&meta_sk->sk_receive_queue)) != NULL) {
  7559. + u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -
  7560. + tcp_hdr(skb)->fin;
  7561. + data_was_unread += len;
  7562. + __kfree_skb(skb);
  7563. + }
  7564. +
  7565. + sk_mem_reclaim(meta_sk);
  7566. +
  7567. + /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */
  7568. + if (meta_sk->sk_state == TCP_CLOSE) {
  7569. + mptcp_for_each_sk_safe(mpcb, sk_it, tmpsk) {
  7570. + if (tcp_sk(sk_it)->send_mp_fclose)
  7571. + continue;
  7572. + mptcp_sub_close(sk_it, 0);
  7573. + }
  7574. + goto adjudge_to_death;
  7575. + }
  7576. +
  7577. + if (data_was_unread) {
  7578. + /* Unread data was tossed, zap the connection. */
  7579. + NET_INC_STATS_USER(sock_net(meta_sk), LINUX_MIB_TCPABORTONCLOSE);
  7580. + tcp_set_state(meta_sk, TCP_CLOSE);
  7581. + tcp_send_active_reset(meta_sk, meta_sk->sk_allocation);
  7582. + } else if (sock_flag(meta_sk, SOCK_LINGER) && !meta_sk->sk_lingertime) {
  7583. + /* Check zero linger _after_ checking for unread data. */
  7584. + meta_sk->sk_prot->disconnect(meta_sk, 0);
  7585. + NET_INC_STATS_USER(sock_net(meta_sk), LINUX_MIB_TCPABORTONDATA);
  7586. + } else if (tcp_close_state(meta_sk)) {
  7587. + mptcp_send_fin(meta_sk);
  7588. + } else if (meta_tp->snd_una == meta_tp->write_seq) {
  7589. + /* The DATA_FIN has been sent and acknowledged
  7590. + * (e.g., by sk_shutdown). Close all the other subflows
  7591. + */
  7592. + mptcp_for_each_sk_safe(mpcb, sk_it, tmpsk) {
  7593. + unsigned long delay = 0;
  7594. + /* If we are the passive closer, don't trigger
  7595. + * subflow-fin until the subflow has been finned
  7596. + * by the peer. - thus we add a delay
  7597. + */
  7598. + if (mpcb->passive_close &&
  7599. + sk_it->sk_state == TCP_ESTABLISHED)
  7600. + delay = inet_csk(sk_it)->icsk_rto << 3;
  7601. +
  7602. + mptcp_sub_close(sk_it, delay);
  7603. + }
  7604. + }
  7605. +
  7606. + sk_stream_wait_close(meta_sk, timeout);
  7607. +
  7608. +adjudge_to_death:
  7609. + state = meta_sk->sk_state;
  7610. + sock_hold(meta_sk);
  7611. + sock_orphan(meta_sk);
  7612. +
  7613. + /* socket will be freed after mptcp_close - we have to prevent
  7614. + * access from the subflows.
  7615. + */
  7616. + mptcp_for_each_sk(mpcb, sk_it) {
  7617. + /* Similar to sock_orphan, but we don't set it DEAD, because
  7618. + * the callbacks are still set and must be called.
  7619. + */
  7620. + write_lock_bh(&sk_it->sk_callback_lock);
  7621. + sk_set_socket(sk_it, NULL);
  7622. + sk_it->sk_wq = NULL;
  7623. + write_unlock_bh(&sk_it->sk_callback_lock);
  7624. + }
  7625. +
  7626. + /* It is the last release_sock in its life. It will remove backlog. */
  7627. + release_sock(meta_sk);
  7628. +
  7629. + /* Now socket is owned by kernel and we acquire BH lock
  7630. + * to finish close. No need to check for user refs.
  7631. + */
  7632. + local_bh_disable();
  7633. + bh_lock_sock(meta_sk);
  7634. + WARN_ON(sock_owned_by_user(meta_sk));
  7635. +
  7636. + percpu_counter_inc(meta_sk->sk_prot->orphan_count);
  7637. +
  7638. + /* Have we already been destroyed by a softirq or backlog? */
  7639. + if (state != TCP_CLOSE && meta_sk->sk_state == TCP_CLOSE)
  7640. + goto out;
  7641. +
  7642. + /* This is a (useful) BSD violating of the RFC. There is a
  7643. + * problem with TCP as specified in that the other end could
  7644. + * keep a socket open forever with no application left this end.
  7645. + * We use a 3 minute timeout (about the same as BSD) then kill
  7646. + * our end. If they send after that then tough - BUT: long enough
  7647. + * that we won't make the old 4*rto = almost no time - whoops
  7648. + * reset mistake.
  7649. + *
  7650. + * Nope, it was not mistake. It is really desired behaviour
  7651. + * f.e. on http servers, when such sockets are useless, but
  7652. + * consume significant resources. Let's do it with special
  7653. + * linger2 option. --ANK
  7654. + */
  7655. +
  7656. + if (meta_sk->sk_state == TCP_FIN_WAIT2) {
  7657. + if (meta_tp->linger2 < 0) {
  7658. + tcp_set_state(meta_sk, TCP_CLOSE);
  7659. + tcp_send_active_reset(meta_sk, GFP_ATOMIC);
  7660. + NET_INC_STATS_BH(sock_net(meta_sk),
  7661. + LINUX_MIB_TCPABORTONLINGER);
  7662. + } else {
  7663. + const int tmo = tcp_fin_time(meta_sk);
  7664. +
  7665. + if (tmo > TCP_TIMEWAIT_LEN) {
  7666. + inet_csk_reset_keepalive_timer(meta_sk,
  7667. + tmo - TCP_TIMEWAIT_LEN);
  7668. + } else {
  7669. + tcp_time_wait(meta_sk, TCP_FIN_WAIT2, tmo);
  7670. + goto out;
  7671. + }
  7672. + }
  7673. + }
  7674. + if (meta_sk->sk_state != TCP_CLOSE) {
  7675. + sk_mem_reclaim(meta_sk);
  7676. + if (tcp_too_many_orphans(meta_sk, 0)) {
  7677. + if (net_ratelimit())
  7678. + pr_info("MPTCP: too many of orphaned sockets\n");
  7679. + tcp_set_state(meta_sk, TCP_CLOSE);
  7680. + tcp_send_active_reset(meta_sk, GFP_ATOMIC);
  7681. + NET_INC_STATS_BH(sock_net(meta_sk),
  7682. + LINUX_MIB_TCPABORTONMEMORY);
  7683. + }
  7684. + }
  7685. +
  7686. +
  7687. + if (meta_sk->sk_state == TCP_CLOSE)
  7688. + inet_csk_destroy_sock(meta_sk);
  7689. + /* Otherwise, socket is reprieved until protocol close. */
  7690. +
  7691. +out:
  7692. + bh_unlock_sock(meta_sk);
  7693. + local_bh_enable();
  7694. + mutex_unlock(&mpcb->mpcb_mutex);
  7695. + sock_put(meta_sk); /* Taken by sock_hold */
  7696. +}
  7697. +
  7698. +void mptcp_disconnect(struct sock *sk)
  7699. +{
  7700. + struct sock *subsk, *tmpsk;
  7701. + struct tcp_sock *tp = tcp_sk(sk);
  7702. +
  7703. + __skb_queue_purge(&tp->mpcb->reinject_queue);
  7704. +
  7705. + if (tp->inside_tk_table) {
  7706. + mptcp_hash_remove_bh(tp);
  7707. + reqsk_queue_destroy(&inet_csk(tp->meta_sk)->icsk_accept_queue);
  7708. + }
  7709. +
  7710. + local_bh_disable();
  7711. + mptcp_for_each_sk_safe(tp->mpcb, subsk, tmpsk) {
  7712. + /* The socket will get removed from the subsocket-list
  7713. + * and made non-mptcp by setting mpc to 0.
  7714. + *
  7715. + * This is necessary, because tcp_disconnect assumes
  7716. + * that the connection is completly dead afterwards.
  7717. + * Thus we need to do a mptcp_del_sock. Due to this call
  7718. + * we have to make it non-mptcp.
  7719. + *
  7720. + * We have to lock the socket, because we set mpc to 0.
  7721. + * An incoming packet would take the subsocket's lock
  7722. + * and go on into the receive-path.
  7723. + * This would be a race.
  7724. + */
  7725. +
  7726. + bh_lock_sock(subsk);
  7727. + mptcp_del_sock(subsk);
  7728. + reset_mpc(tcp_sk(subsk));
  7729. + mptcp_sub_force_close(subsk);
  7730. + bh_unlock_sock(subsk);
  7731. + }
  7732. + local_bh_enable();
  7733. +
  7734. + tp->was_meta_sk = 1;
  7735. + reset_mpc(tp);
  7736. +}
  7737. +
  7738. +
  7739. +/* Returns 1 if we should enable MPTCP for that socket. */
  7740. +int mptcp_doit(struct sock *sk)
  7741. +{
  7742. + /* Do not allow MPTCP enabling if the MPTCP initialization failed */
  7743. + if (mptcp_init_failed)
  7744. + return 0;
  7745. +
  7746. + if (sysctl_mptcp_enabled == MPTCP_APP && !tcp_sk(sk)->mptcp_enabled)
  7747. + return 0;
  7748. +
  7749. + /* Socket may already be established (e.g., called from tcp_recvmsg) */
  7750. + if (tcp_sk(sk)->mpc || tcp_sk(sk)->request_mptcp)
  7751. + return 1;
  7752. +
  7753. + /* Don't do mptcp over loopback */
  7754. + if (sk->sk_family == AF_INET &&
  7755. + (ipv4_is_loopback(inet_sk(sk)->inet_daddr) ||
  7756. + ipv4_is_loopback(inet_sk(sk)->inet_saddr)))
  7757. + return 0;
  7758. +#if IS_ENABLED(CONFIG_IPV6)
  7759. + if (sk->sk_family == AF_INET6 &&
  7760. + (ipv6_addr_loopback(&sk->sk_v6_daddr) ||
  7761. + ipv6_addr_loopback(&inet6_sk(sk)->saddr)))
  7762. + return 0;
  7763. +#endif
  7764. + if (mptcp_v6_is_v4_mapped(sk) &&
  7765. + ipv4_is_loopback(inet_sk(sk)->inet_saddr))
  7766. + return 0;
  7767. +
  7768. +#ifdef CONFIG_TCP_MD5SIG
  7769. + /* If TCP_MD5SIG is enabled, do not do MPTCP - there is no Option-Space */
  7770. + if (tcp_sk(sk)->af_specific->md5_lookup(sk, sk))
  7771. + return 0;
  7772. +#endif
  7773. +
  7774. + return 1;
  7775. +}
  7776. +
  7777. +int mptcp_create_master_sk(struct sock *meta_sk, __u64 remote_key, u32 window)
  7778. +{
  7779. + struct tcp_sock *master_tp;
  7780. + struct sock *master_sk;
  7781. +
  7782. + if (mptcp_alloc_mpcb(meta_sk, remote_key, window))
  7783. + goto err_alloc_mpcb;
  7784. +
  7785. + master_sk = tcp_sk(meta_sk)->mpcb->master_sk;
  7786. + master_tp = tcp_sk(master_sk);
  7787. +
  7788. + if (mptcp_add_sock(meta_sk, master_sk, 0, 0, GFP_ATOMIC))
  7789. + goto err_add_sock;
  7790. +
  7791. + if (__inet_inherit_port(meta_sk, master_sk) < 0)
  7792. + goto err_add_sock;
  7793. +
  7794. + meta_sk->sk_prot->unhash(meta_sk);
  7795. +
  7796. + if (master_sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(master_sk))
  7797. + __inet_hash_nolisten(master_sk, NULL);
  7798. +#if IS_ENABLED(CONFIG_IPV6)
  7799. + else
  7800. + __inet6_hash(master_sk, NULL);
  7801. +#endif
  7802. +
  7803. + master_tp->mptcp->init_rcv_wnd = master_tp->rcv_wnd;
  7804. +
  7805. + return 0;
  7806. +
  7807. +err_add_sock:
  7808. + mptcp_fallback_meta_sk(meta_sk);
  7809. +
  7810. + inet_csk_prepare_forced_close(master_sk);
  7811. + tcp_done(master_sk);
  7812. + inet_csk_prepare_forced_close(meta_sk);
  7813. + tcp_done(meta_sk);
  7814. +
  7815. +err_alloc_mpcb:
  7816. + return -ENOBUFS;
  7817. +}
  7818. +
  7819. +int mptcp_check_req_master(struct sock *sk, struct sock *child,
  7820. + struct request_sock *req,
  7821. + struct request_sock **prev,
  7822. + struct mptcp_options_received *mopt)
  7823. +{
  7824. + struct tcp_sock *child_tp = tcp_sk(child);
  7825. + struct sock *meta_sk = child;
  7826. + struct mptcp_cb *mpcb;
  7827. + struct mptcp_request_sock *mtreq;
  7828. +
  7829. + if (!tcp_rsk(req)->saw_mpc)
  7830. + return 1;
  7831. +
  7832. + /* Just set this values to pass them to mptcp_alloc_mpcb */
  7833. + mtreq = mptcp_rsk(req);
  7834. + child_tp->mptcp_loc_key = mtreq->mptcp_loc_key;
  7835. + child_tp->mptcp_loc_token = mtreq->mptcp_loc_token;
  7836. +
  7837. + if (mptcp_create_master_sk(meta_sk, mtreq->mptcp_rem_key,
  7838. + child_tp->snd_wnd))
  7839. + return -ENOBUFS;
  7840. +
  7841. + child = tcp_sk(child)->mpcb->master_sk;
  7842. + child_tp = tcp_sk(child);
  7843. + mpcb = child_tp->mpcb;
  7844. +
  7845. + child_tp->mptcp->snt_isn = tcp_rsk(req)->snt_isn;
  7846. + child_tp->mptcp->rcv_isn = tcp_rsk(req)->rcv_isn;
  7847. +
  7848. + mpcb->dss_csum = mtreq->dss_csum;
  7849. + mpcb->server_side = 1;
  7850. +
  7851. + /* Will be moved to ESTABLISHED by tcp_rcv_state_process() */
  7852. + mptcp_update_metasocket(child, meta_sk);
  7853. +
  7854. + /* Needs to be done here additionally, because when accepting a
  7855. + * new connection we pass by __reqsk_free and not reqsk_free.
  7856. + */
  7857. + mptcp_reqsk_remove_tk(req);
  7858. +
  7859. + /* Hold when creating the meta-sk in tcp_vX_syn_recv_sock. */
  7860. + sock_put(meta_sk);
  7861. +
  7862. + inet_csk_reqsk_queue_unlink(sk, req, prev);
  7863. + inet_csk_reqsk_queue_removed(sk, req);
  7864. + inet_csk_reqsk_queue_add(sk, req, meta_sk);
  7865. +
  7866. + return 0;
  7867. +}
  7868. +
  7869. +struct sock *mptcp_check_req_child(struct sock *meta_sk, struct sock *child,
  7870. + struct request_sock *req,
  7871. + struct request_sock **prev,
  7872. + struct mptcp_options_received *mopt)
  7873. +{
  7874. + struct tcp_sock *child_tp = tcp_sk(child);
  7875. + struct mptcp_request_sock *mtreq = mptcp_rsk(req);
  7876. + struct mptcp_cb *mpcb = mtreq->mpcb;
  7877. + u8 hash_mac_check[20];
  7878. +
  7879. + child_tp->inside_tk_table = 0;
  7880. +
  7881. + if (!mopt->join_ack)
  7882. + goto teardown;
  7883. +
  7884. + mptcp_hmac_sha1((u8 *)&mpcb->mptcp_rem_key,
  7885. + (u8 *)&mpcb->mptcp_loc_key,
  7886. + (u8 *)&mtreq->mptcp_rem_nonce,
  7887. + (u8 *)&mtreq->mptcp_loc_nonce,
  7888. + (u32 *)hash_mac_check);
  7889. +
  7890. + if (memcmp(hash_mac_check, (char *)&mopt->mptcp_recv_mac, 20))
  7891. + goto teardown;
  7892. +
  7893. + /* Point it to the same struct socket and wq as the meta_sk */
  7894. + sk_set_socket(child, meta_sk->sk_socket);
  7895. + child->sk_wq = meta_sk->sk_wq;
  7896. +
  7897. + if (mptcp_add_sock(meta_sk, child, mtreq->loc_id, mtreq->rem_id, GFP_ATOMIC)) {
  7898. + reset_mpc(child_tp); /* Has been inherited, but now
  7899. + * child_tp->mptcp is NULL
  7900. + */
  7901. + /* TODO when we support acking the third ack for new subflows,
  7902. + * we should silently discard this third ack, by returning NULL.
  7903. + *
  7904. + * Maybe, at the retransmission we will have enough memory to
  7905. + * fully add the socket to the meta-sk.
  7906. + */
  7907. + goto teardown;
  7908. + }
  7909. +
  7910. + /* The child is a clone of the meta socket, we must now reset
  7911. + * some of the fields
  7912. + */
  7913. + child_tp->mptcp->rcv_low_prio = mtreq->low_prio;
  7914. +
  7915. + /* We should allow proper increase of the snd/rcv-buffers. Thus, we
  7916. + * use the original values instead of the bloated up ones from the
  7917. + * clone.
  7918. + */
  7919. + child->sk_sndbuf = mpcb->orig_sk_sndbuf;
  7920. + child->sk_rcvbuf = mpcb->orig_sk_rcvbuf;
  7921. +
  7922. + child_tp->mptcp->slave_sk = 1;
  7923. + child_tp->mptcp->snt_isn = tcp_rsk(req)->snt_isn;
  7924. + child_tp->mptcp->rcv_isn = tcp_rsk(req)->rcv_isn;
  7925. + child_tp->mptcp->init_rcv_wnd = req->rcv_wnd;
  7926. +
  7927. + child_tp->tsq_flags = 0;
  7928. +
  7929. + /* Subflows do not use the accept queue, as they
  7930. + * are attached immediately to the mpcb.
  7931. + */
  7932. + inet_csk_reqsk_queue_drop(meta_sk, req, prev);
  7933. + return child;
  7934. +
  7935. +teardown:
  7936. + /* Drop this request - sock creation failed. */
  7937. + inet_csk_reqsk_queue_drop(meta_sk, req, prev);
  7938. + inet_csk_prepare_forced_close(child);
  7939. + tcp_done(child);
  7940. + return meta_sk;
  7941. +}
  7942. +
  7943. +int mptcp_time_wait(struct sock *sk, struct tcp_timewait_sock *tw)
  7944. +{
  7945. + struct mptcp_tw *mptw;
  7946. + struct tcp_sock *tp = tcp_sk(sk);
  7947. + struct mptcp_cb *mpcb = tp->mpcb;
  7948. +
  7949. + /* Alloc MPTCP-tw-sock */
  7950. + mptw = kmem_cache_alloc(mptcp_tw_cache, GFP_ATOMIC);
  7951. + if (!mptw)
  7952. + return -ENOBUFS;
  7953. +
  7954. + atomic_inc(&mpcb->mpcb_refcnt);
  7955. +
  7956. + tw->mptcp_tw = mptw;
  7957. + mptw->loc_key = mpcb->mptcp_loc_key;
  7958. + mptw->meta_tw = mpcb->in_time_wait;
  7959. + if (mptw->meta_tw) {
  7960. + mptw->rcv_nxt = mptcp_get_rcv_nxt_64(mptcp_meta_tp(tp));
  7961. + if (mpcb->mptw_state != TCP_TIME_WAIT)
  7962. + mptw->rcv_nxt++;
  7963. + }
  7964. + rcu_assign_pointer(mptw->mpcb, mpcb);
  7965. +
  7966. + spin_lock(&mpcb->tw_lock);
  7967. + list_add_rcu(&mptw->list, &tp->mpcb->tw_list);
  7968. + mptw->in_list = 1;
  7969. + spin_unlock(&mpcb->tw_lock);
  7970. +
  7971. + return 0;
  7972. +}
  7973. +
  7974. +void mptcp_twsk_destructor(struct tcp_timewait_sock *tw)
  7975. +{
  7976. + struct mptcp_cb *mpcb;
  7977. +
  7978. + rcu_read_lock();
  7979. + mpcb = rcu_dereference(tw->mptcp_tw->mpcb);
  7980. +
  7981. + /* If we are still holding a ref to the mpcb, we have to remove ourself
  7982. + * from the list and drop the ref properly.
  7983. + */
  7984. + if (mpcb && atomic_inc_not_zero(&mpcb->mpcb_refcnt)) {
  7985. + spin_lock(&mpcb->tw_lock);
  7986. + if (tw->mptcp_tw->in_list) {
  7987. + list_del_rcu(&tw->mptcp_tw->list);
  7988. + tw->mptcp_tw->in_list = 0;
  7989. + }
  7990. + spin_unlock(&mpcb->tw_lock);
  7991. +
  7992. + /* Twice, because we increased it above */
  7993. + mptcp_mpcb_put(mpcb);
  7994. + mptcp_mpcb_put(mpcb);
  7995. + }
  7996. +
  7997. + rcu_read_unlock();
  7998. +
  7999. + kmem_cache_free(mptcp_tw_cache, tw->mptcp_tw);
  8000. +}
  8001. +
  8002. +/* Updates the rcv_nxt of the time-wait-socks and allows them to ack a
  8003. + * data-fin.
  8004. + */
  8005. +void mptcp_update_tw_socks(const struct tcp_sock *tp, int state)
  8006. +{
  8007. + struct mptcp_tw *mptw;
  8008. +
  8009. + /* Used for sockets that go into tw after the meta
  8010. + * (see mptcp_time_wait())
  8011. + */
  8012. + tp->mpcb->in_time_wait = 1;
  8013. + tp->mpcb->mptw_state = state;
  8014. +
  8015. + /* Update the time-wait-sock's information */
  8016. + rcu_read_lock_bh();
  8017. + list_for_each_entry_rcu(mptw, &tp->mpcb->tw_list, list) {
  8018. + mptw->meta_tw = 1;
  8019. + mptw->rcv_nxt = mptcp_get_rcv_nxt_64(tp);
  8020. +
  8021. + /* We want to ack a DATA_FIN, but are yet in FIN_WAIT_2 -
  8022. + * pretend as if the DATA_FIN has already reached us, that way
  8023. + * the checks in tcp_timewait_state_process will be good as the
  8024. + * DATA_FIN comes in.
  8025. + */
  8026. + if (state != TCP_TIME_WAIT)
  8027. + mptw->rcv_nxt++;
  8028. + }
  8029. + rcu_read_unlock_bh();
  8030. +}
  8031. +
  8032. +void mptcp_tsq_flags(struct sock *sk)
  8033. +{
  8034. + struct tcp_sock *tp = tcp_sk(sk);
  8035. + struct sock *meta_sk = mptcp_meta_sk(sk);
  8036. +
  8037. + /* It will be handled as a regular deferred-call */
  8038. + if (is_meta_sk(sk))
  8039. + return;
  8040. +
  8041. + if (list_empty(&tp->mptcp->cb_list)) {
  8042. + list_add(&tp->mptcp->cb_list, &tp->mpcb->callback_list);
  8043. + /* We need to hold it here, as the sock_hold is not assured
  8044. + * by the release_sock as it is done in regular TCP.
  8045. + *
  8046. + * The subsocket may get inet_csk_destroy'd while it is inside
  8047. + * the callback_list.
  8048. + */
  8049. + sock_hold(sk);
  8050. + }
  8051. +
  8052. + if (!test_and_set_bit(MPTCP_SUB_DEFERRED, &tcp_sk(meta_sk)->tsq_flags))
  8053. + sock_hold(meta_sk);
  8054. +}
  8055. +
  8056. +void mptcp_tsq_sub_deferred(struct sock *meta_sk)
  8057. +{
  8058. + struct tcp_sock *meta_tp = tcp_sk(meta_sk);
  8059. + struct mptcp_tcp_sock *mptcp, *tmp;
  8060. +
  8061. + BUG_ON(!is_meta_sk(meta_sk) && !meta_tp->was_meta_sk);
  8062. +
  8063. + __sock_put(meta_sk);
  8064. + list_for_each_entry_safe(mptcp, tmp, &meta_tp->mpcb->callback_list, cb_list) {
  8065. + struct tcp_sock *tp = mptcp->tp;
  8066. + struct sock *sk = (struct sock *)tp;
  8067. +
  8068. + list_del_init(&mptcp->cb_list);
  8069. + sk->sk_prot->release_cb(sk);
  8070. + /* Final sock_put (cfr. mptcp_tsq_flags */
  8071. + sock_put(sk);
  8072. + }
  8073. +}
  8074. +
  8075. +struct workqueue_struct *mptcp_wq;
  8076. +EXPORT_SYMBOL(mptcp_wq);
  8077. +
  8078. +/* Output /proc/net/mptcp */
  8079. +static int mptcp_pm_seq_show(struct seq_file *seq, void *v)
  8080. +{
  8081. + struct tcp_sock *meta_tp;
  8082. + struct net *net = seq->private;
  8083. + int i, n = 0;
  8084. +
  8085. + seq_printf(seq, " sl loc_tok rem_tok v6 "
  8086. + "local_address "
  8087. + "remote_address "
  8088. + "st ns tx_queue rx_queue inode");
  8089. + seq_putc(seq, '\n');
  8090. +
  8091. + for (i = 0; i < MPTCP_HASH_SIZE; i++) {
  8092. + struct hlist_nulls_node *node;
  8093. + rcu_read_lock_bh();
  8094. + hlist_nulls_for_each_entry_rcu(meta_tp, node,
  8095. + &tk_hashtable[i], tk_table) {
  8096. + struct mptcp_cb *mpcb = meta_tp->mpcb;
  8097. + struct sock *meta_sk = (struct sock *)meta_tp;
  8098. + struct inet_sock *isk = inet_sk(meta_sk);
  8099. +
  8100. + if (!meta_tp->mpc || !net_eq(net, sock_net(meta_sk)))
  8101. + continue;
  8102. +
  8103. + seq_printf(seq, "%4d: %04X %04X ", n++,
  8104. + mpcb->mptcp_loc_token,
  8105. + mpcb->mptcp_rem_token);
  8106. + if (meta_sk->sk_family == AF_INET ||
  8107. + mptcp_v6_is_v4_mapped(meta_sk)) {
  8108. + seq_printf(seq, " 0 %08X:%04X %08X:%04X ",
  8109. + isk->inet_saddr,
  8110. + ntohs(isk->inet_sport),
  8111. + isk->inet_daddr,
  8112. + ntohs(isk->inet_dport));
  8113. +#if IS_ENABLED(CONFIG_IPV6)
  8114. + } else if (meta_sk->sk_family == AF_INET6) {
  8115. + struct in6_addr *src = &isk->pinet6->saddr;
  8116. + struct in6_addr *dst = &meta_sk->sk_v6_daddr;
  8117. + seq_printf(seq, " 1 %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X",
  8118. + src->s6_addr32[0], src->s6_addr32[1],
  8119. + src->s6_addr32[2], src->s6_addr32[3],
  8120. + ntohs(isk->inet_sport),
  8121. + dst->s6_addr32[0], dst->s6_addr32[1],
  8122. + dst->s6_addr32[2], dst->s6_addr32[3],
  8123. + ntohs(isk->inet_dport));
  8124. +#endif
  8125. + }
  8126. + seq_printf(seq, " %02X %02X %08X:%08X %lu",
  8127. + meta_sk->sk_state, mpcb->cnt_subflows,
  8128. + meta_tp->write_seq - meta_tp->snd_una,
  8129. + max_t(int, meta_tp->rcv_nxt -
  8130. + meta_tp->copied_seq, 0),
  8131. + sock_i_ino(meta_sk));
  8132. + seq_putc(seq, '\n');
  8133. + }
  8134. + rcu_read_unlock_bh();
  8135. + }
  8136. +
  8137. + return 0;
  8138. +}
  8139. +
  8140. +static int mptcp_pm_seq_open(struct inode *inode, struct file *file)
  8141. +{
  8142. + return single_open_net(inode, file, mptcp_pm_seq_show);
  8143. +}
  8144. +
  8145. +static const struct file_operations mptcp_pm_seq_fops = {
  8146. + .owner = THIS_MODULE,
  8147. + .open = mptcp_pm_seq_open,
  8148. + .read = seq_read,
  8149. + .llseek = seq_lseek,
  8150. + .release = single_release_net,
  8151. +};
  8152. +
  8153. +static int mptcp_pm_init_net(struct net *net)
  8154. +{
  8155. + if (!proc_create("mptcp", S_IRUGO, net->proc_net, &mptcp_pm_seq_fops))
  8156. + return -ENOMEM;
  8157. +
  8158. + return 0;
  8159. +}
  8160. +
  8161. +static void mptcp_pm_exit_net(struct net *net)
  8162. +{
  8163. + remove_proc_entry("mptcp", net->proc_net);
  8164. +}
  8165. +
  8166. +static struct pernet_operations mptcp_pm_proc_ops = {
  8167. + .init = mptcp_pm_init_net,
  8168. + .exit = mptcp_pm_exit_net,
  8169. +};
  8170. +
  8171. +/* General initialization of mptcp */
  8172. +void __init mptcp_init(void)
  8173. +{
  8174. + int i;
  8175. + struct ctl_table_header *mptcp_sysctl;
  8176. +
  8177. + mptcp_sock_cache = kmem_cache_create("mptcp_sock",
  8178. + sizeof(struct mptcp_tcp_sock),
  8179. + 0, SLAB_HWCACHE_ALIGN,
  8180. + NULL);
  8181. + if (!mptcp_sock_cache)
  8182. + goto mptcp_sock_cache_failed;
  8183. +
  8184. + mptcp_cb_cache = kmem_cache_create("mptcp_cb", sizeof(struct mptcp_cb),
  8185. + 0, SLAB_DESTROY_BY_RCU|SLAB_HWCACHE_ALIGN,
  8186. + NULL);
  8187. + if (!mptcp_cb_cache)
  8188. + goto mptcp_cb_cache_failed;
  8189. +
  8190. + mptcp_tw_cache = kmem_cache_create("mptcp_tw", sizeof(struct mptcp_tw),
  8191. + 0, SLAB_DESTROY_BY_RCU|SLAB_HWCACHE_ALIGN,
  8192. + NULL);
  8193. + if (!mptcp_tw_cache)
  8194. + goto mptcp_tw_cache_failed;
  8195. +
  8196. + get_random_bytes(mptcp_secret, sizeof(mptcp_secret));
  8197. +
  8198. + mptcp_wq = alloc_workqueue("mptcp_wq", WQ_UNBOUND | WQ_MEM_RECLAIM, 8);
  8199. + if (!mptcp_wq)
  8200. + goto alloc_workqueue_failed;
  8201. +
  8202. + for (i = 0; i < MPTCP_HASH_SIZE; i++) {
  8203. + INIT_HLIST_NULLS_HEAD(&tk_hashtable[i], i);
  8204. + INIT_LIST_HEAD(&mptcp_reqsk_htb[i]);
  8205. + INIT_HLIST_NULLS_HEAD(&mptcp_reqsk_tk_htb[i], i);
  8206. + }
  8207. +
  8208. + spin_lock_init(&mptcp_reqsk_hlock);
  8209. + spin_lock_init(&mptcp_tk_hashlock);
  8210. +
  8211. + if (register_pernet_subsys(&mptcp_pm_proc_ops))
  8212. + goto pernet_failed;
  8213. +
  8214. +#if IS_ENABLED(CONFIG_IPV6)
  8215. + if (mptcp_pm_v6_init())
  8216. + goto mptcp_pm_v6_failed;
  8217. +#endif
  8218. + if (mptcp_pm_v4_init())
  8219. + goto mptcp_pm_v4_failed;
  8220. +
  8221. + mptcp_sysctl = register_net_sysctl(&init_net, "net/mptcp", mptcp_table);
  8222. + if (!mptcp_sysctl)
  8223. + goto register_sysctl_failed;
  8224. +
  8225. + if (mptcp_register_path_manager(&mptcp_pm_default))
  8226. + goto register_pm_failed;
  8227. +
  8228. + pr_info("MPTCP: Stable release v0.89.0-rc");
  8229. +
  8230. + mptcp_init_failed = false;
  8231. +
  8232. + return;
  8233. +
  8234. +register_pm_failed:
  8235. + unregister_net_sysctl_table(mptcp_sysctl);
  8236. +register_sysctl_failed:
  8237. + mptcp_pm_v4_undo();
  8238. +mptcp_pm_v4_failed:
  8239. +#if IS_ENABLED(CONFIG_IPV6)
  8240. + mptcp_pm_v6_undo();
  8241. +mptcp_pm_v6_failed:
  8242. +#endif
  8243. + unregister_pernet_subsys(&mptcp_pm_proc_ops);
  8244. +pernet_failed:
  8245. + destroy_workqueue(mptcp_wq);
  8246. +alloc_workqueue_failed:
  8247. + kmem_cache_destroy(mptcp_tw_cache);
  8248. +mptcp_tw_cache_failed:
  8249. + kmem_cache_destroy(mptcp_cb_cache);
  8250. +mptcp_cb_cache_failed:
  8251. + kmem_cache_destroy(mptcp_sock_cache);
  8252. +mptcp_sock_cache_failed:
  8253. + mptcp_init_failed = true;
  8254. +}
  8255. diff -Nur linux-3.14.45.orig/net/mptcp/mptcp_fullmesh.c linux-3.14.45/net/mptcp/mptcp_fullmesh.c
  8256. --- linux-3.14.45.orig/net/mptcp/mptcp_fullmesh.c 1970-01-01 01:00:00.000000000 +0100
  8257. +++ linux-3.14.45/net/mptcp/mptcp_fullmesh.c 2015-06-24 14:15:48.891862483 +0200
  8258. @@ -0,0 +1,1313 @@
  8259. +#include <linux/module.h>
  8260. +
  8261. +#include <net/mptcp.h>
  8262. +#include <net/mptcp_v4.h>
  8263. +
  8264. +#if IS_ENABLED(CONFIG_IPV6)
  8265. +#include <net/mptcp_v6.h>
  8266. +#include <net/addrconf.h>
  8267. +#endif
  8268. +
  8269. +enum {
  8270. + MPTCP_EVENT_ADD = 1,
  8271. + MPTCP_EVENT_DEL,
  8272. + MPTCP_EVENT_MOD,
  8273. +};
  8274. +
  8275. +struct mptcp_loc_addr {
  8276. + struct mptcp_loc4 locaddr4[MPTCP_MAX_ADDR];
  8277. + u8 loc4_bits;
  8278. + u8 next_v4_index;
  8279. +
  8280. + struct mptcp_loc6 locaddr6[MPTCP_MAX_ADDR];
  8281. + u8 loc6_bits;
  8282. + u8 next_v6_index;
  8283. +};
  8284. +
  8285. +struct mptcp_addr_event {
  8286. + struct list_head list;
  8287. + unsigned short family;
  8288. + u8 code:7,
  8289. + low_prio:1;
  8290. + union inet_addr addr;
  8291. +};
  8292. +
  8293. +struct fullmesh_priv {
  8294. + /* Worker struct for subflow establishment */
  8295. + struct work_struct subflow_work;
  8296. + /* Delayed worker, when the routing-tables are not yet ready. */
  8297. + struct delayed_work subflow_retry_work;
  8298. +
  8299. + struct mptcp_cb *mpcb;
  8300. +
  8301. + u16 remove_addrs; /* Addresses to remove */
  8302. + u8 announced_addrs_v4; /* IPv4 Addresses we did announce */
  8303. + u8 announced_addrs_v6; /* IPv4 Addresses we did announce */
  8304. +
  8305. + u8 add_addr; /* Are we sending an add_addr? */
  8306. +};
  8307. +
  8308. +struct mptcp_fm_ns {
  8309. + struct mptcp_loc_addr __rcu *local;
  8310. + spinlock_t local_lock; /* Protecting the above pointer */
  8311. + struct list_head events;
  8312. + struct delayed_work address_worker;
  8313. +
  8314. + struct net *net;
  8315. +};
  8316. +
  8317. +static struct mptcp_pm_ops full_mesh __read_mostly;
  8318. +
  8319. +static struct mptcp_fm_ns *fm_get_ns(struct net *net)
  8320. +{
  8321. + return (struct mptcp_fm_ns *)net->mptcp.path_managers[MPTCP_PM_FULLMESH];
  8322. +}
  8323. +
  8324. +static void full_mesh_create_subflows(struct sock *meta_sk);
  8325. +
  8326. +static void retry_subflow_worker(struct work_struct *work)
  8327. +{
  8328. + struct delayed_work *delayed_work = container_of(work,
  8329. + struct delayed_work,
  8330. + work);
  8331. + struct fullmesh_priv *pm_priv = container_of(delayed_work,
  8332. + struct fullmesh_priv,
  8333. + subflow_retry_work);
  8334. + struct mptcp_cb *mpcb = pm_priv->mpcb;
  8335. + struct sock *meta_sk = mpcb->meta_sk;
  8336. + struct mptcp_loc_addr *mptcp_local;
  8337. + struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(meta_sk));
  8338. + int iter = 0, i;
  8339. +
  8340. + /* We need a local (stable) copy of the address-list. Really, it is not
  8341. + * such a big deal, if the address-list is not 100% up-to-date.
  8342. + */
  8343. + rcu_read_lock_bh();
  8344. + mptcp_local = rcu_dereference_bh(fm_ns->local);
  8345. + mptcp_local = kmemdup(mptcp_local, sizeof(*mptcp_local), GFP_ATOMIC);
  8346. + rcu_read_unlock_bh();
  8347. +
  8348. + if (!mptcp_local)
  8349. + return;
  8350. +
  8351. +next_subflow:
  8352. + if (iter) {
  8353. + release_sock(meta_sk);
  8354. + mutex_unlock(&mpcb->mpcb_mutex);
  8355. +
  8356. + yield();
  8357. + }
  8358. + mutex_lock(&mpcb->mpcb_mutex);
  8359. + lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING);
  8360. +
  8361. + iter++;
  8362. +
  8363. + if (sock_flag(meta_sk, SOCK_DEAD))
  8364. + goto exit;
  8365. +
  8366. + mptcp_for_each_bit_set(mpcb->rem4_bits, i) {
  8367. + struct mptcp_rem4 *rem = &mpcb->remaddr4[i];
  8368. + /* Do we need to retry establishing a subflow ? */
  8369. + if (rem->retry_bitfield) {
  8370. + int i = mptcp_find_free_index(~rem->retry_bitfield);
  8371. +
  8372. + rem->bitfield |= (1 << i);
  8373. + rem->retry_bitfield &= ~(1 << i);
  8374. +
  8375. + mptcp_init4_subsockets(meta_sk, &mptcp_local->locaddr4[i], rem);
  8376. + goto next_subflow;
  8377. + }
  8378. + }
  8379. +
  8380. +#if IS_ENABLED(CONFIG_IPV6)
  8381. + mptcp_for_each_bit_set(mpcb->rem6_bits, i) {
  8382. + struct mptcp_rem6 *rem = &mpcb->remaddr6[i];
  8383. +
  8384. + /* Do we need to retry establishing a subflow ? */
  8385. + if (rem->retry_bitfield) {
  8386. + int i = mptcp_find_free_index(~rem->retry_bitfield);
  8387. +
  8388. + rem->bitfield |= (1 << i);
  8389. + rem->retry_bitfield &= ~(1 << i);
  8390. +
  8391. + mptcp_init6_subsockets(meta_sk, &mptcp_local->locaddr6[i], rem);
  8392. + goto next_subflow;
  8393. + }
  8394. + }
  8395. +#endif
  8396. +
  8397. +exit:
  8398. + kfree(mptcp_local);
  8399. + release_sock(meta_sk);
  8400. + mutex_unlock(&mpcb->mpcb_mutex);
  8401. + sock_put(meta_sk);
  8402. +}
  8403. +
  8404. +/**
  8405. + * Create all new subflows, by doing calls to mptcp_initX_subsockets
  8406. + *
  8407. + * This function uses a goto next_subflow, to allow releasing the lock between
  8408. + * new subflows and giving other processes a chance to do some work on the
  8409. + * socket and potentially finishing the communication.
  8410. + **/
  8411. +static void create_subflow_worker(struct work_struct *work)
  8412. +{
  8413. + struct fullmesh_priv *pm_priv = container_of(work,
  8414. + struct fullmesh_priv,
  8415. + subflow_work);
  8416. + struct mptcp_cb *mpcb = pm_priv->mpcb;
  8417. + struct sock *meta_sk = mpcb->meta_sk;
  8418. + struct mptcp_loc_addr *mptcp_local;
  8419. + struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(meta_sk));
  8420. + int iter = 0, retry = 0;
  8421. + int i;
  8422. +
  8423. + /* We need a local (stable) copy of the address-list. Really, it is not
  8424. + * such a big deal, if the address-list is not 100% up-to-date.
  8425. + */
  8426. + rcu_read_lock_bh();
  8427. + mptcp_local = rcu_dereference_bh(fm_ns->local);
  8428. + mptcp_local = kmemdup(mptcp_local, sizeof(*mptcp_local), GFP_ATOMIC);
  8429. + rcu_read_unlock_bh();
  8430. +
  8431. + if (!mptcp_local)
  8432. + return;
  8433. +
  8434. +next_subflow:
  8435. + if (iter) {
  8436. + release_sock(meta_sk);
  8437. + mutex_unlock(&mpcb->mpcb_mutex);
  8438. +
  8439. + yield();
  8440. + }
  8441. + mutex_lock(&mpcb->mpcb_mutex);
  8442. + lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING);
  8443. +
  8444. + iter++;
  8445. +
  8446. + if (sock_flag(meta_sk, SOCK_DEAD))
  8447. + goto exit;
  8448. +
  8449. + if (mpcb->master_sk &&
  8450. + !tcp_sk(mpcb->master_sk)->mptcp->fully_established)
  8451. + goto exit;
  8452. +
  8453. + mptcp_for_each_bit_set(mpcb->rem4_bits, i) {
  8454. + struct mptcp_rem4 *rem;
  8455. + u8 remaining_bits;
  8456. +
  8457. + rem = &mpcb->remaddr4[i];
  8458. + remaining_bits = ~(rem->bitfield) & mptcp_local->loc4_bits;
  8459. +
  8460. + /* Are there still combinations to handle? */
  8461. + if (remaining_bits) {
  8462. + int i = mptcp_find_free_index(~remaining_bits);
  8463. +
  8464. + rem->bitfield |= (1 << i);
  8465. +
  8466. + /* If a route is not yet available then retry once */
  8467. + if (mptcp_init4_subsockets(meta_sk, &mptcp_local->locaddr4[i],
  8468. + rem) == -ENETUNREACH)
  8469. + retry = rem->retry_bitfield |= (1 << i);
  8470. + goto next_subflow;
  8471. + }
  8472. + }
  8473. +
  8474. +#if IS_ENABLED(CONFIG_IPV6)
  8475. + mptcp_for_each_bit_set(mpcb->rem6_bits, i) {
  8476. + struct mptcp_rem6 *rem;
  8477. + u8 remaining_bits;
  8478. +
  8479. + rem = &mpcb->remaddr6[i];
  8480. + remaining_bits = ~(rem->bitfield) & mptcp_local->loc6_bits;
  8481. +
  8482. + /* Are there still combinations to handle? */
  8483. + if (remaining_bits) {
  8484. + int i = mptcp_find_free_index(~remaining_bits);
  8485. +
  8486. + rem->bitfield |= (1 << i);
  8487. +
  8488. + /* If a route is not yet available then retry once */
  8489. + if (mptcp_init6_subsockets(meta_sk, &mptcp_local->locaddr6[i],
  8490. + rem) == -ENETUNREACH)
  8491. + retry = rem->retry_bitfield |= (1 << i);
  8492. + goto next_subflow;
  8493. + }
  8494. + }
  8495. +#endif
  8496. +
  8497. + if (retry && !delayed_work_pending(&pm_priv->subflow_retry_work)) {
  8498. + sock_hold(meta_sk);
  8499. + queue_delayed_work(mptcp_wq, &pm_priv->subflow_retry_work,
  8500. + msecs_to_jiffies(MPTCP_SUBFLOW_RETRY_DELAY));
  8501. + }
  8502. +
  8503. +exit:
  8504. + kfree(mptcp_local);
  8505. + release_sock(meta_sk);
  8506. + mutex_unlock(&mpcb->mpcb_mutex);
  8507. + sock_put(meta_sk);
  8508. +}
  8509. +
  8510. +static void update_remove_addrs(u8 addr_id, struct sock *meta_sk,
  8511. + struct mptcp_loc_addr *mptcp_local)
  8512. +{
  8513. + struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
  8514. + struct fullmesh_priv *fmp = (struct fullmesh_priv *)&mpcb->mptcp_pm[0];
  8515. + struct sock *sk;
  8516. + int i;
  8517. +
  8518. + fmp->remove_addrs |= (1 << addr_id);
  8519. + /* v4 goes from 0 to MPTCP_MAX_ADDR, v6 beyond */
  8520. + if (addr_id < MPTCP_MAX_ADDR) {
  8521. + fmp->announced_addrs_v4 &= ~(1 << addr_id);
  8522. +
  8523. + mptcp_for_each_bit_set(mpcb->rem4_bits, i) {
  8524. + mpcb->remaddr4[i].bitfield &= mptcp_local->loc4_bits;
  8525. + mpcb->remaddr4[i].retry_bitfield &= mptcp_local->loc4_bits;
  8526. + }
  8527. + } else {
  8528. + fmp->announced_addrs_v6 &= ~(1 << (addr_id - MPTCP_MAX_ADDR));
  8529. +
  8530. + mptcp_for_each_bit_set(mpcb->rem6_bits, i) {
  8531. + mpcb->remaddr6[i].bitfield &= mptcp_local->loc6_bits;
  8532. + mpcb->remaddr6[i].retry_bitfield &= mptcp_local->loc6_bits;
  8533. + }
  8534. + }
  8535. +
  8536. + sk = mptcp_select_ack_sock(meta_sk, 0);
  8537. + if (sk)
  8538. + tcp_send_ack(sk);
  8539. +}
  8540. +
  8541. +static int mptcp_find_address(struct mptcp_loc_addr *mptcp_local,
  8542. + sa_family_t family, union inet_addr *addr)
  8543. +{
  8544. + int i;
  8545. + u8 loc_bits;
  8546. + bool found = false;
  8547. +
  8548. + if (family == AF_INET)
  8549. + loc_bits = mptcp_local->loc4_bits;
  8550. + else
  8551. + loc_bits = mptcp_local->loc6_bits;
  8552. +
  8553. + mptcp_for_each_bit_set(loc_bits, i) {
  8554. + if (family == AF_INET &&
  8555. + mptcp_local->locaddr4[i].addr.s_addr == addr->in.s_addr) {
  8556. + found = true;
  8557. + break;
  8558. + }
  8559. + if (family == AF_INET6 &&
  8560. + ipv6_addr_equal(&mptcp_local->locaddr6[i].addr,
  8561. + &addr->in6)) {
  8562. + found = true;
  8563. + break;
  8564. + }
  8565. + }
  8566. +
  8567. + if (!found)
  8568. + return -1;
  8569. +
  8570. + return i;
  8571. +}
  8572. +
  8573. +static void mptcp_address_worker(struct work_struct *work)
  8574. +{
  8575. + struct delayed_work *delayed_work = container_of(work,
  8576. + struct delayed_work,
  8577. + work);
  8578. + struct mptcp_fm_ns *fm_ns = container_of(delayed_work,
  8579. + struct mptcp_fm_ns,
  8580. + address_worker);
  8581. + struct net *net = fm_ns->net;
  8582. + struct mptcp_addr_event *event = NULL;
  8583. + struct mptcp_loc_addr *mptcp_local, *old;
  8584. + int i, id = -1; /* id is used in the socket-code on a delete-event */
  8585. + bool success; /* Used to indicate if we succeeded handling the event */
  8586. +
  8587. +next_event:
  8588. + success = false;
  8589. + kfree(event);
  8590. +
  8591. + /* First, let's dequeue an event from our event-list */
  8592. + rcu_read_lock_bh();
  8593. + spin_lock(&fm_ns->local_lock);
  8594. +
  8595. + event = list_first_entry_or_null(&fm_ns->events,
  8596. + struct mptcp_addr_event, list);
  8597. + if (!event) {
  8598. + spin_unlock(&fm_ns->local_lock);
  8599. + rcu_read_unlock_bh();
  8600. + return;
  8601. + }
  8602. +
  8603. + list_del(&event->list);
  8604. +
  8605. + mptcp_local = rcu_dereference_bh(fm_ns->local);
  8606. +
  8607. + if (event->code == MPTCP_EVENT_DEL) {
  8608. + id = mptcp_find_address(mptcp_local, event->family, &event->addr);
  8609. +
  8610. + /* Not in the list - so we don't care */
  8611. + if (id < 0)
  8612. + goto duno;
  8613. +
  8614. + old = mptcp_local;
  8615. + mptcp_local = kmemdup(mptcp_local, sizeof(*mptcp_local),
  8616. + GFP_ATOMIC);
  8617. + if (!mptcp_local)
  8618. + goto duno;
  8619. +
  8620. + if (event->family == AF_INET)
  8621. + mptcp_local->loc4_bits &= ~(1 << id);
  8622. + else
  8623. + mptcp_local->loc6_bits &= ~(1 << id);
  8624. +
  8625. + rcu_assign_pointer(fm_ns->local, mptcp_local);
  8626. + kfree(old);
  8627. + } else {
  8628. + int i = mptcp_find_address(mptcp_local, event->family, &event->addr);
  8629. + int j = i;
  8630. +
  8631. + if (j < 0) {
  8632. + /* Not in the list, so we have to find an empty slot */
  8633. + if (event->family == AF_INET)
  8634. + i = __mptcp_find_free_index(mptcp_local->loc4_bits, -1,
  8635. + mptcp_local->next_v4_index);
  8636. + if (event->family == AF_INET6)
  8637. + i = __mptcp_find_free_index(mptcp_local->loc6_bits, -1,
  8638. + mptcp_local->next_v6_index);
  8639. +
  8640. + if (i < 0) {
  8641. + mptcp_debug("%s no more space\n", __func__);
  8642. + goto duno;
  8643. + }
  8644. +
  8645. + /* It might have been a MOD-event. */
  8646. + event->code = MPTCP_EVENT_ADD;
  8647. + } else {
  8648. + /* Let's check if anything changes */
  8649. + if (event->family == AF_INET &&
  8650. + event->low_prio == mptcp_local->locaddr4[i].low_prio)
  8651. + goto duno;
  8652. +
  8653. + if (event->family == AF_INET6 &&
  8654. + event->low_prio == mptcp_local->locaddr6[i].low_prio)
  8655. + goto duno;
  8656. + }
  8657. +
  8658. + old = mptcp_local;
  8659. + mptcp_local = kmemdup(mptcp_local, sizeof(*mptcp_local),
  8660. + GFP_ATOMIC);
  8661. + if (!mptcp_local)
  8662. + goto duno;
  8663. +
  8664. + if (event->family == AF_INET) {
  8665. + mptcp_local->locaddr4[i].addr.s_addr = event->addr.in.s_addr;
  8666. + mptcp_local->locaddr4[i].loc4_id = i + 1;
  8667. + mptcp_local->locaddr4[i].low_prio = event->low_prio;
  8668. + } else {
  8669. + mptcp_local->locaddr6[i].addr = event->addr.in6;
  8670. + mptcp_local->locaddr6[i].loc6_id = i + MPTCP_MAX_ADDR;
  8671. + mptcp_local->locaddr6[i].low_prio = event->low_prio;
  8672. + }
  8673. +
  8674. + if (j < 0) {
  8675. + if (event->family == AF_INET) {
  8676. + mptcp_local->loc4_bits |= (1 << i);
  8677. + mptcp_local->next_v4_index = i + 1;
  8678. + } else {
  8679. + mptcp_local->loc6_bits |= (1 << i);
  8680. + mptcp_local->next_v6_index = i + 1;
  8681. + }
  8682. + }
  8683. +
  8684. + rcu_assign_pointer(fm_ns->local, mptcp_local);
  8685. + kfree(old);
  8686. + }
  8687. + success = true;
  8688. +
  8689. +duno:
  8690. + spin_unlock(&fm_ns->local_lock);
  8691. + rcu_read_unlock_bh();
  8692. +
  8693. + if (!success)
  8694. + goto next_event;
  8695. +
  8696. + /* Now we iterate over the MPTCP-sockets and apply the event. */
  8697. + for (i = 0; i < MPTCP_HASH_SIZE; i++) {
  8698. + struct hlist_nulls_node *node;
  8699. + struct tcp_sock *meta_tp;
  8700. +
  8701. + rcu_read_lock_bh();
  8702. + hlist_nulls_for_each_entry_rcu(meta_tp, node, &tk_hashtable[i],
  8703. + tk_table) {
  8704. + struct mptcp_cb *mpcb = meta_tp->mpcb;
  8705. + struct sock *meta_sk = (struct sock *)meta_tp, *sk;
  8706. + struct fullmesh_priv *fmp = (struct fullmesh_priv *)&mpcb->mptcp_pm[0];
  8707. +
  8708. + if (sock_net(meta_sk) != net)
  8709. + continue;
  8710. +
  8711. + if (unlikely(!atomic_inc_not_zero(&meta_sk->sk_refcnt)))
  8712. + continue;
  8713. +
  8714. + bh_lock_sock(meta_sk);
  8715. +
  8716. + if (!meta_tp->mpc || !is_meta_sk(meta_sk) ||
  8717. + mpcb->infinite_mapping_snd ||
  8718. + mpcb->infinite_mapping_rcv ||
  8719. + mpcb->send_infinite_mapping)
  8720. + goto next;
  8721. +
  8722. + /* May be that the pm has changed in-between */
  8723. + if (mpcb->pm_ops != &full_mesh)
  8724. + goto next;
  8725. +
  8726. + if (sock_owned_by_user(meta_sk)) {
  8727. + if (!test_and_set_bit(MPTCP_PATH_MANAGER,
  8728. + &meta_tp->tsq_flags))
  8729. + sock_hold(meta_sk);
  8730. +
  8731. + goto next;
  8732. + }
  8733. +
  8734. + if (event->code == MPTCP_EVENT_ADD) {
  8735. + if (event->family == AF_INET)
  8736. + fmp->add_addr++;
  8737. +#if IS_ENABLED(CONFIG_IPV6)
  8738. + if (event->family == AF_INET6)
  8739. + fmp->add_addr++;
  8740. +#endif
  8741. +
  8742. + sk = mptcp_select_ack_sock(meta_sk, 0);
  8743. + if (sk)
  8744. + tcp_send_ack(sk);
  8745. +
  8746. + full_mesh_create_subflows(meta_sk);
  8747. + }
  8748. +
  8749. + if (event->code == MPTCP_EVENT_DEL) {
  8750. + struct sock *sk, *tmpsk;
  8751. + struct mptcp_loc_addr *mptcp_local;
  8752. + bool found = false;
  8753. +
  8754. + mptcp_local = rcu_dereference_bh(fm_ns->local);
  8755. +
  8756. + /* Look for the socket and remove him */
  8757. + mptcp_for_each_sk_safe(mpcb, sk, tmpsk) {
  8758. + if ((event->family == AF_INET6 &&
  8759. + (sk->sk_family == AF_INET ||
  8760. + mptcp_v6_is_v4_mapped(sk))) ||
  8761. + (event->family == AF_INET &&
  8762. + (sk->sk_family == AF_INET6 &&
  8763. + !mptcp_v6_is_v4_mapped(sk))))
  8764. + continue;
  8765. +
  8766. + if (event->family == AF_INET &&
  8767. + (sk->sk_family == AF_INET ||
  8768. + mptcp_v6_is_v4_mapped(sk)) &&
  8769. + inet_sk(sk)->inet_saddr != event->addr.in.s_addr)
  8770. + continue;
  8771. +
  8772. + if (event->family == AF_INET6 &&
  8773. + sk->sk_family == AF_INET6 &&
  8774. + !ipv6_addr_equal(&inet6_sk(sk)->saddr, &event->addr.in6))
  8775. + continue;
  8776. +
  8777. + /* Reinject, so that pf = 1 and so we
  8778. + * won't select this one as the
  8779. + * ack-sock.
  8780. + */
  8781. + mptcp_reinject_data(sk, 0);
  8782. +
  8783. + /* A master is special, it has
  8784. + * address-id 0
  8785. + */
  8786. + if (!tcp_sk(sk)->mptcp->loc_id)
  8787. + update_remove_addrs(0, meta_sk, mptcp_local);
  8788. + else if (tcp_sk(sk)->mptcp->loc_id != id)
  8789. + update_remove_addrs(tcp_sk(sk)->mptcp->loc_id, meta_sk, mptcp_local);
  8790. +
  8791. + mptcp_sub_force_close(sk);
  8792. + found = true;
  8793. + }
  8794. +
  8795. + if (!found)
  8796. + goto next;
  8797. +
  8798. + /* The id may have been given by the event,
  8799. + * matching on a local address. And it may not
  8800. + * have matched on one of the above sockets,
  8801. + * because the client never created a subflow.
  8802. + * So, we have to finally remove it here.
  8803. + */
  8804. + if (id > 0)
  8805. + update_remove_addrs(id, meta_sk, mptcp_local);
  8806. + }
  8807. +
  8808. + if (event->code == MPTCP_EVENT_MOD) {
  8809. + struct sock *sk;
  8810. +
  8811. + mptcp_for_each_sk(mpcb, sk) {
  8812. + struct tcp_sock *tp = tcp_sk(sk);
  8813. + if (event->family == AF_INET &&
  8814. + (sk->sk_family == AF_INET ||
  8815. + mptcp_v6_is_v4_mapped(sk)) &&
  8816. + inet_sk(sk)->inet_saddr == event->addr.in.s_addr) {
  8817. + if (event->low_prio != tp->mptcp->low_prio) {
  8818. + tp->mptcp->send_mp_prio = 1;
  8819. + tp->mptcp->low_prio = event->low_prio;
  8820. +
  8821. + tcp_send_ack(sk);
  8822. + }
  8823. + }
  8824. +
  8825. + if (event->family == AF_INET6 &&
  8826. + sk->sk_family == AF_INET6 &&
  8827. + !ipv6_addr_equal(&inet6_sk(sk)->saddr, &event->addr.in6)) {
  8828. + if (event->low_prio != tp->mptcp->low_prio) {
  8829. + tp->mptcp->send_mp_prio = 1;
  8830. + tp->mptcp->low_prio = event->low_prio;
  8831. +
  8832. + tcp_send_ack(sk);
  8833. + }
  8834. + }
  8835. + }
  8836. + }
  8837. +next:
  8838. + bh_unlock_sock(meta_sk);
  8839. + sock_put(meta_sk);
  8840. + }
  8841. + rcu_read_unlock_bh();
  8842. + }
  8843. + goto next_event;
  8844. +}
  8845. +
  8846. +static struct mptcp_addr_event *lookup_similar_event(struct net *net,
  8847. + struct mptcp_addr_event *event)
  8848. +{
  8849. + struct mptcp_addr_event *eventq;
  8850. + struct mptcp_fm_ns *fm_ns = fm_get_ns(net);
  8851. +
  8852. + list_for_each_entry(eventq, &fm_ns->events, list) {
  8853. + if (eventq->family != event->family)
  8854. + continue;
  8855. + if (event->family == AF_INET) {
  8856. + if (eventq->addr.in.s_addr == event->addr.in.s_addr)
  8857. + return eventq;
  8858. + } else {
  8859. + if (ipv6_addr_equal(&eventq->addr.in6, &event->addr.in6))
  8860. + return eventq;
  8861. + }
  8862. + }
  8863. + return NULL;
  8864. +}
  8865. +
  8866. +/* We already hold the net-namespace MPTCP-lock */
  8867. +static void add_pm_event(struct net *net, struct mptcp_addr_event *event)
  8868. +{
  8869. + struct mptcp_addr_event *eventq = lookup_similar_event(net, event);
  8870. + struct mptcp_fm_ns *fm_ns = fm_get_ns(net);
  8871. +
  8872. + if (eventq) {
  8873. + switch (event->code) {
  8874. + case MPTCP_EVENT_DEL:
  8875. + list_del(&eventq->list);
  8876. + kfree(eventq);
  8877. + break;
  8878. + case MPTCP_EVENT_ADD:
  8879. + eventq->low_prio = event->low_prio;
  8880. + eventq->code = MPTCP_EVENT_ADD;
  8881. + return;
  8882. + case MPTCP_EVENT_MOD:
  8883. + eventq->low_prio = event->low_prio;
  8884. + return;
  8885. + }
  8886. + }
  8887. +
  8888. + /* OK, we have to add the new address to the wait queue */
  8889. + eventq = kmemdup(event, sizeof(struct mptcp_addr_event), GFP_ATOMIC);
  8890. + if (!eventq)
  8891. + return;
  8892. +
  8893. + list_add_tail(&eventq->list, &fm_ns->events);
  8894. +
  8895. + /* Create work-queue */
  8896. + if (!delayed_work_pending(&fm_ns->address_worker))
  8897. + queue_delayed_work(mptcp_wq, &fm_ns->address_worker,
  8898. + msecs_to_jiffies(500));
  8899. +}
  8900. +
  8901. +static void addr4_event_handler(struct in_ifaddr *ifa, unsigned long event,
  8902. + struct net *net)
  8903. +{
  8904. + struct net_device *netdev = ifa->ifa_dev->dev;
  8905. + struct mptcp_fm_ns *fm_ns = fm_get_ns(net);
  8906. + struct mptcp_addr_event mpevent;
  8907. +
  8908. + if (ifa->ifa_scope > RT_SCOPE_LINK ||
  8909. + ipv4_is_loopback(ifa->ifa_local))
  8910. + return;
  8911. +
  8912. + spin_lock_bh(&fm_ns->local_lock);
  8913. +
  8914. + mpevent.family = AF_INET;
  8915. + mpevent.addr.in.s_addr = ifa->ifa_local;
  8916. + mpevent.low_prio = (netdev->flags & IFF_MPBACKUP) ? 1 : 0;
  8917. +
  8918. + if (event == NETDEV_DOWN || !netif_running(netdev) ||
  8919. + (netdev->flags & IFF_NOMULTIPATH))
  8920. + mpevent.code = MPTCP_EVENT_DEL;
  8921. + else if (event == NETDEV_UP)
  8922. + mpevent.code = MPTCP_EVENT_ADD;
  8923. + else if (event == NETDEV_CHANGE)
  8924. + mpevent.code = MPTCP_EVENT_MOD;
  8925. +
  8926. + add_pm_event(net, &mpevent);
  8927. +
  8928. + spin_unlock_bh(&fm_ns->local_lock);
  8929. + return;
  8930. +}
  8931. +
  8932. +/* React on IPv4-addr add/rem-events */
  8933. +static int mptcp_pm_inetaddr_event(struct notifier_block *this,
  8934. + unsigned long event, void *ptr)
  8935. +{
  8936. + struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
  8937. + struct net *net = dev_net(ifa->ifa_dev->dev);
  8938. +
  8939. + addr4_event_handler(ifa, event, net);
  8940. +
  8941. + return NOTIFY_DONE;
  8942. +}
  8943. +
  8944. +static struct notifier_block mptcp_pm_inetaddr_notifier = {
  8945. + .notifier_call = mptcp_pm_inetaddr_event,
  8946. +};
  8947. +
  8948. +#if IS_ENABLED(CONFIG_IPV6)
  8949. +
  8950. +/* IPV6-related address/interface watchers */
  8951. +struct mptcp_dad_data {
  8952. + struct timer_list timer;
  8953. + struct inet6_ifaddr *ifa;
  8954. +};
  8955. +
  8956. +static void dad_callback(unsigned long arg);
  8957. +static int inet6_addr_event(struct notifier_block *this,
  8958. + unsigned long event, void *ptr);
  8959. +
  8960. +static int ipv6_is_in_dad_state(struct inet6_ifaddr *ifa)
  8961. +{
  8962. + return ((ifa->flags & IFA_F_TENTATIVE) &&
  8963. + ifa->state == INET6_IFADDR_STATE_DAD);
  8964. +}
  8965. +
  8966. +static void dad_init_timer(struct mptcp_dad_data *data,
  8967. + struct inet6_ifaddr *ifa)
  8968. +{
  8969. + data->ifa = ifa;
  8970. + data->timer.data = (unsigned long)data;
  8971. + data->timer.function = dad_callback;
  8972. + if (ifa->idev->cnf.rtr_solicit_delay)
  8973. + data->timer.expires = jiffies + ifa->idev->cnf.rtr_solicit_delay;
  8974. + else
  8975. + data->timer.expires = jiffies + (HZ/10);
  8976. +}
  8977. +
  8978. +static void dad_callback(unsigned long arg)
  8979. +{
  8980. + struct mptcp_dad_data *data = (struct mptcp_dad_data *)arg;
  8981. +
  8982. + if (ipv6_is_in_dad_state(data->ifa)) {
  8983. + dad_init_timer(data, data->ifa);
  8984. + add_timer(&data->timer);
  8985. + } else {
  8986. + inet6_addr_event(NULL, NETDEV_UP, data->ifa);
  8987. + in6_ifa_put(data->ifa);
  8988. + kfree(data);
  8989. + }
  8990. +}
  8991. +
  8992. +static inline void dad_setup_timer(struct inet6_ifaddr *ifa)
  8993. +{
  8994. + struct mptcp_dad_data *data;
  8995. +
  8996. + data = kmalloc(sizeof(*data), GFP_ATOMIC);
  8997. +
  8998. + if (!data)
  8999. + return;
  9000. +
  9001. + init_timer(&data->timer);
  9002. + dad_init_timer(data, ifa);
  9003. + add_timer(&data->timer);
  9004. + in6_ifa_hold(ifa);
  9005. +}
  9006. +
  9007. +static void addr6_event_handler(struct inet6_ifaddr *ifa, unsigned long event,
  9008. + struct net *net)
  9009. +{
  9010. + struct net_device *netdev = ifa->idev->dev;
  9011. + int addr_type = ipv6_addr_type(&ifa->addr);
  9012. + struct mptcp_fm_ns *fm_ns = fm_get_ns(net);
  9013. + struct mptcp_addr_event mpevent;
  9014. +
  9015. + if (ifa->scope > RT_SCOPE_LINK ||
  9016. + addr_type == IPV6_ADDR_ANY ||
  9017. + (addr_type & IPV6_ADDR_LOOPBACK) ||
  9018. + (addr_type & IPV6_ADDR_LINKLOCAL))
  9019. + return;
  9020. +
  9021. + spin_lock_bh(&fm_ns->local_lock);
  9022. +
  9023. + mpevent.family = AF_INET6;
  9024. + mpevent.addr.in6 = ifa->addr;
  9025. + mpevent.low_prio = (netdev->flags & IFF_MPBACKUP) ? 1 : 0;
  9026. +
  9027. + if (event == NETDEV_DOWN ||!netif_running(netdev) ||
  9028. + (netdev->flags & IFF_NOMULTIPATH))
  9029. + mpevent.code = MPTCP_EVENT_DEL;
  9030. + else if (event == NETDEV_UP)
  9031. + mpevent.code = MPTCP_EVENT_ADD;
  9032. + else if (event == NETDEV_CHANGE)
  9033. + mpevent.code = MPTCP_EVENT_MOD;
  9034. +
  9035. + add_pm_event(net, &mpevent);
  9036. +
  9037. + spin_unlock_bh(&fm_ns->local_lock);
  9038. + return;
  9039. +}
  9040. +
  9041. +/* React on IPv6-addr add/rem-events */
  9042. +static int inet6_addr_event(struct notifier_block *this, unsigned long event,
  9043. + void *ptr)
  9044. +{
  9045. + struct inet6_ifaddr *ifa6 = (struct inet6_ifaddr *)ptr;
  9046. + struct net *net = dev_net(ifa6->idev->dev);
  9047. +
  9048. + if (ipv6_is_in_dad_state(ifa6))
  9049. + dad_setup_timer(ifa6);
  9050. + else
  9051. + addr6_event_handler(ifa6, event, net);
  9052. +
  9053. + return NOTIFY_DONE;
  9054. +}
  9055. +
  9056. +static struct notifier_block inet6_addr_notifier = {
  9057. + .notifier_call = inet6_addr_event,
  9058. +};
  9059. +
  9060. +#endif
  9061. +
  9062. +/* React on ifup/down-events */
  9063. +static int netdev_event(struct notifier_block *this, unsigned long event,
  9064. + void *ptr)
  9065. +{
  9066. + struct net_device *dev = netdev_notifier_info_to_dev(ptr);
  9067. + struct in_device *in_dev;
  9068. +#if IS_ENABLED(CONFIG_IPV6)
  9069. + struct inet6_dev *in6_dev;
  9070. +#endif
  9071. +
  9072. + if (!(event == NETDEV_UP || event == NETDEV_DOWN ||
  9073. + event == NETDEV_CHANGE))
  9074. + return NOTIFY_DONE;
  9075. +
  9076. + rcu_read_lock();
  9077. + in_dev = __in_dev_get_rtnl(dev);
  9078. +
  9079. + if (in_dev) {
  9080. + for_ifa(in_dev) {
  9081. + mptcp_pm_inetaddr_event(NULL, event, ifa);
  9082. + } endfor_ifa(in_dev);
  9083. + }
  9084. +
  9085. +#if IS_ENABLED(CONFIG_IPV6)
  9086. + in6_dev = __in6_dev_get(dev);
  9087. +
  9088. + if (in6_dev) {
  9089. + struct inet6_ifaddr *ifa6;
  9090. + list_for_each_entry(ifa6, &in6_dev->addr_list, if_list)
  9091. + inet6_addr_event(NULL, event, ifa6);
  9092. + }
  9093. +#endif
  9094. +
  9095. + rcu_read_unlock();
  9096. + return NOTIFY_DONE;
  9097. +}
  9098. +
  9099. +static struct notifier_block mptcp_pm_netdev_notifier = {
  9100. + .notifier_call = netdev_event,
  9101. +};
  9102. +
  9103. +static void full_mesh_new_session(struct sock *meta_sk, int index)
  9104. +{
  9105. + struct mptcp_loc_addr *mptcp_local;
  9106. + struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
  9107. + struct fullmesh_priv *fmp = (struct fullmesh_priv *)&mpcb->mptcp_pm[0];
  9108. + struct net *net = sock_net(meta_sk);
  9109. + struct mptcp_fm_ns *fm_ns = fm_get_ns(net);
  9110. + struct sock *sk;
  9111. + int i;
  9112. +
  9113. + if (index == -1) {
  9114. + mptcp_fallback_default(mpcb);
  9115. + return;
  9116. + }
  9117. +
  9118. + /* Initialize workqueue-struct */
  9119. + INIT_WORK(&fmp->subflow_work, create_subflow_worker);
  9120. + INIT_DELAYED_WORK(&fmp->subflow_retry_work, retry_subflow_worker);
  9121. + fmp->mpcb = mpcb;
  9122. +
  9123. + sk = mptcp_select_ack_sock(meta_sk, 0);
  9124. +
  9125. + rcu_read_lock();
  9126. + mptcp_local = rcu_dereference(fm_ns->local);
  9127. +
  9128. + /* Look for the address among the local addresses */
  9129. + mptcp_for_each_bit_set(mptcp_local->loc4_bits, i) {
  9130. + __be32 ifa_address = mptcp_local->locaddr4[i].addr.s_addr;
  9131. +
  9132. + /* We do not need to announce the initial subflow's address again */
  9133. + if ((meta_sk->sk_family == AF_INET ||
  9134. + mptcp_v6_is_v4_mapped(meta_sk)) &&
  9135. + inet_sk(meta_sk)->inet_saddr == ifa_address)
  9136. + continue;
  9137. +
  9138. + fmp->add_addr++;
  9139. +
  9140. + if (sk)
  9141. + tcp_send_ack(sk);
  9142. + }
  9143. +
  9144. +#if IS_ENABLED(CONFIG_IPV6)
  9145. + mptcp_for_each_bit_set(mptcp_local->loc6_bits, i) {
  9146. + struct in6_addr *ifa6 = &mptcp_local->locaddr6[i].addr;
  9147. +
  9148. + /* We do not need to announce the initial subflow's address again */
  9149. + if (meta_sk->sk_family == AF_INET6 &&
  9150. + ipv6_addr_equal(&inet6_sk(meta_sk)->saddr, ifa6))
  9151. + continue;
  9152. +
  9153. + fmp->add_addr++;
  9154. +
  9155. + if (sk)
  9156. + tcp_send_ack(sk);
  9157. + }
  9158. +#endif
  9159. +
  9160. + rcu_read_unlock();
  9161. +
  9162. + if (meta_sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(meta_sk))
  9163. + fmp->announced_addrs_v4 |= (1 << index);
  9164. + else
  9165. + fmp->announced_addrs_v6 |= (1 << index);
  9166. +}
  9167. +
  9168. +static void full_mesh_create_subflows(struct sock *meta_sk)
  9169. +{
  9170. + struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
  9171. + struct fullmesh_priv *pm_priv = (struct fullmesh_priv *)&mpcb->mptcp_pm[0];
  9172. +
  9173. + if (mpcb->infinite_mapping_snd || mpcb->infinite_mapping_rcv ||
  9174. + mpcb->send_infinite_mapping ||
  9175. + mpcb->server_side || sock_flag(meta_sk, SOCK_DEAD))
  9176. + return;
  9177. +
  9178. + /* The master may not yet be fully established (address added through
  9179. + * mptcp_update_metasocket). Then, we should not attempt to create new
  9180. + * subflows.
  9181. + */
  9182. + if (mpcb->master_sk &&
  9183. + !tcp_sk(mpcb->master_sk)->mptcp->fully_established)
  9184. + return;
  9185. +
  9186. + if (!work_pending(&pm_priv->subflow_work)) {
  9187. + sock_hold(meta_sk);
  9188. + queue_work(mptcp_wq, &pm_priv->subflow_work);
  9189. + }
  9190. +}
  9191. +
  9192. +/* Called upon release_sock, if the socket was owned by the user during
  9193. + * a path-management event.
  9194. + */
  9195. +static void full_mesh_release_sock(struct sock *meta_sk)
  9196. +{
  9197. + struct mptcp_loc_addr *mptcp_local;
  9198. + struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
  9199. + struct fullmesh_priv *fmp = (struct fullmesh_priv *)&mpcb->mptcp_pm[0];
  9200. + struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(meta_sk));
  9201. + struct sock *sk, *tmpsk;
  9202. + int i;
  9203. +
  9204. + rcu_read_lock();
  9205. + mptcp_local = rcu_dereference(fm_ns->local);
  9206. +
  9207. + /* First, detect modifications or additions */
  9208. + mptcp_for_each_bit_set(mptcp_local->loc4_bits, i) {
  9209. + struct in_addr ifa = mptcp_local->locaddr4[i].addr;
  9210. + bool found = false;
  9211. +
  9212. + mptcp_for_each_sk(mpcb, sk) {
  9213. + struct tcp_sock *tp = tcp_sk(sk);
  9214. +
  9215. + if (sk->sk_family == AF_INET6 &&
  9216. + !mptcp_v6_is_v4_mapped(sk))
  9217. + continue;
  9218. +
  9219. + if (inet_sk(sk)->inet_saddr != ifa.s_addr)
  9220. + continue;
  9221. +
  9222. + found = true;
  9223. +
  9224. + if (mptcp_local->locaddr4[i].low_prio != tp->mptcp->low_prio) {
  9225. + tp->mptcp->send_mp_prio = 1;
  9226. + tp->mptcp->low_prio = mptcp_local->locaddr4[i].low_prio;
  9227. +
  9228. + tcp_send_ack(sk);
  9229. + }
  9230. + }
  9231. +
  9232. + if (!found) {
  9233. + fmp->add_addr++;
  9234. +
  9235. + sk = mptcp_select_ack_sock(meta_sk, 0);
  9236. + if (sk)
  9237. + tcp_send_ack(sk);
  9238. + full_mesh_create_subflows(meta_sk);
  9239. + }
  9240. + }
  9241. +
  9242. +#if IS_ENABLED(CONFIG_IPV6)
  9243. + mptcp_for_each_bit_set(mptcp_local->loc6_bits, i) {
  9244. + struct in6_addr ifa = mptcp_local->locaddr6[i].addr;
  9245. + bool found = false;
  9246. +
  9247. + mptcp_for_each_sk(mpcb, sk) {
  9248. + struct tcp_sock *tp = tcp_sk(sk);
  9249. +
  9250. + if (sk->sk_family == AF_INET ||
  9251. + mptcp_v6_is_v4_mapped(sk))
  9252. + continue;
  9253. +
  9254. + if (!ipv6_addr_equal(&inet6_sk(sk)->saddr, &ifa))
  9255. + continue;
  9256. +
  9257. + found = true;
  9258. +
  9259. + if (mptcp_local->locaddr6[i].low_prio != tp->mptcp->low_prio) {
  9260. + tp->mptcp->send_mp_prio = 1;
  9261. + tp->mptcp->low_prio = mptcp_local->locaddr6[i].low_prio;
  9262. +
  9263. + tcp_send_ack(sk);
  9264. + }
  9265. + }
  9266. +
  9267. + if (!found) {
  9268. + fmp->add_addr++;
  9269. +
  9270. + sk = mptcp_select_ack_sock(meta_sk, 0);
  9271. + if (sk)
  9272. + tcp_send_ack(sk);
  9273. + full_mesh_create_subflows(meta_sk);
  9274. + }
  9275. + }
  9276. +#endif
  9277. +
  9278. + /* Now, detect address-removals */
  9279. + mptcp_for_each_sk_safe(mpcb, sk, tmpsk) {
  9280. + bool shall_remove = true;
  9281. +
  9282. + if (sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(sk)) {
  9283. + mptcp_for_each_bit_set(mptcp_local->loc4_bits, i) {
  9284. + if (inet_sk(sk)->inet_saddr == mptcp_local->locaddr4[i].addr.s_addr) {
  9285. + shall_remove = false;
  9286. + break;
  9287. + }
  9288. + }
  9289. + } else {
  9290. + mptcp_for_each_bit_set(mptcp_local->loc6_bits, i) {
  9291. + if (ipv6_addr_equal(&inet6_sk(sk)->saddr, &mptcp_local->locaddr6[i].addr)) {
  9292. + shall_remove = false;
  9293. + break;
  9294. + }
  9295. + }
  9296. + }
  9297. +
  9298. + if (shall_remove) {
  9299. + /* Reinject, so that pf = 1 and so we
  9300. + * won't select this one as the
  9301. + * ack-sock.
  9302. + */
  9303. + mptcp_reinject_data(sk, 0);
  9304. +
  9305. + update_remove_addrs(tcp_sk(sk)->mptcp->loc_id, meta_sk,
  9306. + mptcp_local);
  9307. +
  9308. + if (mpcb->master_sk == sk)
  9309. + update_remove_addrs(0, meta_sk, mptcp_local);
  9310. +
  9311. + mptcp_sub_force_close(sk);
  9312. + }
  9313. + }
  9314. + rcu_read_unlock();
  9315. +}
  9316. +
  9317. +static int full_mesh_get_local_index(sa_family_t family, union inet_addr *addr,
  9318. + struct net *net)
  9319. +{
  9320. + struct mptcp_loc_addr *mptcp_local;
  9321. + struct mptcp_fm_ns *fm_ns = fm_get_ns(net);
  9322. + int index;
  9323. +
  9324. + /* Handle the backup-flows */
  9325. + rcu_read_lock();
  9326. + mptcp_local = rcu_dereference(fm_ns->local);
  9327. +
  9328. + index = mptcp_find_address(mptcp_local, family, addr);
  9329. +
  9330. + rcu_read_unlock();
  9331. +
  9332. + return index;
  9333. +}
  9334. +
  9335. +static int full_mesh_get_local_id(sa_family_t family, union inet_addr *addr,
  9336. + struct net *net)
  9337. +{
  9338. + struct mptcp_loc_addr *mptcp_local;
  9339. + struct mptcp_fm_ns *fm_ns = fm_get_ns(net);
  9340. + int index, id = -1;
  9341. +
  9342. + /* Handle the backup-flows */
  9343. + rcu_read_lock();
  9344. + mptcp_local = rcu_dereference(fm_ns->local);
  9345. +
  9346. + index = mptcp_find_address(mptcp_local, family, addr);
  9347. +
  9348. + if (index != -1) {
  9349. + if (family == AF_INET)
  9350. + id = mptcp_local->locaddr4[index].loc4_id;
  9351. + else
  9352. + id = mptcp_local->locaddr6[index].loc6_id;
  9353. + }
  9354. +
  9355. +
  9356. + rcu_read_unlock();
  9357. +
  9358. + return id;
  9359. +}
  9360. +
  9361. +static void full_mesh_addr_signal(struct sock *sk, unsigned *size,
  9362. + struct tcp_out_options *opts,
  9363. + struct sk_buff *skb)
  9364. +{
  9365. + struct tcp_sock *tp = tcp_sk(sk);
  9366. + struct mptcp_cb *mpcb = tp->mpcb;
  9367. + struct fullmesh_priv *fmp = (struct fullmesh_priv *)&mpcb->mptcp_pm[0];
  9368. + struct mptcp_loc_addr *mptcp_local;
  9369. + struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(sk));
  9370. + int remove_addr_len;
  9371. + u8 unannouncedv4, unannouncedv6;
  9372. +
  9373. + if (likely(!fmp->add_addr))
  9374. + goto remove_addr;
  9375. +
  9376. + rcu_read_lock();
  9377. + mptcp_local = rcu_dereference(fm_ns->local);
  9378. +
  9379. + /* IPv4 */
  9380. + unannouncedv4 = (~fmp->announced_addrs_v4) & mptcp_local->loc4_bits;
  9381. + if (unannouncedv4 &&
  9382. + MAX_TCP_OPTION_SPACE - *size >= MPTCP_SUB_LEN_ADD_ADDR4_ALIGN) {
  9383. + int ind = mptcp_find_free_index(~unannouncedv4);
  9384. +
  9385. + opts->options |= OPTION_MPTCP;
  9386. + opts->mptcp_options |= OPTION_ADD_ADDR;
  9387. + opts->add_addr4.addr_id = mptcp_local->locaddr4[ind].loc4_id;
  9388. + opts->add_addr4.addr = mptcp_local->locaddr4[ind].addr;
  9389. + opts->add_addr_v4 = 1;
  9390. +
  9391. + if (skb) {
  9392. + fmp->announced_addrs_v4 |= (1 << ind);
  9393. + fmp->add_addr--;
  9394. + }
  9395. + *size += MPTCP_SUB_LEN_ADD_ADDR4_ALIGN;
  9396. + }
  9397. +
  9398. + /* IPv6 */
  9399. + unannouncedv6 = (~fmp->announced_addrs_v6) & mptcp_local->loc6_bits;
  9400. + if (unannouncedv6 &&
  9401. + MAX_TCP_OPTION_SPACE - *size >= MPTCP_SUB_LEN_ADD_ADDR6_ALIGN) {
  9402. + int ind = mptcp_find_free_index(~unannouncedv6);
  9403. +
  9404. + opts->options |= OPTION_MPTCP;
  9405. + opts->mptcp_options |= OPTION_ADD_ADDR;
  9406. + opts->add_addr6.addr_id = mptcp_local->locaddr6[ind].loc6_id;
  9407. + opts->add_addr6.addr = mptcp_local->locaddr6[ind].addr;
  9408. + opts->add_addr_v6 = 1;
  9409. +
  9410. + if (skb) {
  9411. + fmp->announced_addrs_v6 |= (1 << ind);
  9412. + fmp->add_addr--;
  9413. + }
  9414. + *size += MPTCP_SUB_LEN_ADD_ADDR6_ALIGN;
  9415. + }
  9416. +
  9417. + rcu_read_unlock();
  9418. +
  9419. + if (!unannouncedv4 && !unannouncedv6 && skb) {
  9420. + fmp->add_addr--;
  9421. + }
  9422. +
  9423. +remove_addr:
  9424. + if (likely(!fmp->remove_addrs))
  9425. + return;
  9426. +
  9427. + remove_addr_len = mptcp_sub_len_remove_addr_align(fmp->remove_addrs);
  9428. + if (MAX_TCP_OPTION_SPACE - *size < remove_addr_len)
  9429. + return;
  9430. +
  9431. + opts->options |= OPTION_MPTCP;
  9432. + opts->mptcp_options |= OPTION_REMOVE_ADDR;
  9433. + opts->remove_addrs = fmp->remove_addrs;
  9434. + *size += remove_addr_len;
  9435. + if (skb)
  9436. + fmp->remove_addrs = 0;
  9437. +}
  9438. +
  9439. +static int mptcp_fm_init_net(struct net *net)
  9440. +{
  9441. + struct mptcp_loc_addr *mptcp_local;
  9442. + struct mptcp_fm_ns *fm_ns;
  9443. +
  9444. + fm_ns = kzalloc(sizeof(*fm_ns), GFP_KERNEL);
  9445. + if (!fm_ns)
  9446. + return -ENOBUFS;
  9447. +
  9448. + mptcp_local = kzalloc(sizeof(*mptcp_local), GFP_KERNEL);
  9449. + if (!mptcp_local) {
  9450. + kfree(fm_ns);
  9451. + return -ENOBUFS;
  9452. + }
  9453. +
  9454. + mptcp_local->next_v4_index = 1;
  9455. +
  9456. + rcu_assign_pointer(fm_ns->local, mptcp_local);
  9457. + INIT_DELAYED_WORK(&fm_ns->address_worker, mptcp_address_worker);
  9458. + INIT_LIST_HEAD(&fm_ns->events);
  9459. + spin_lock_init(&fm_ns->local_lock);
  9460. + fm_ns->net = net;
  9461. + net->mptcp.path_managers[MPTCP_PM_FULLMESH] = fm_ns;
  9462. +
  9463. + return 0;
  9464. +}
  9465. +
  9466. +static void mptcp_fm_exit_net(struct net *net)
  9467. +{
  9468. + struct mptcp_addr_event *eventq, *tmp;
  9469. + struct mptcp_fm_ns *fm_ns;
  9470. + struct mptcp_loc_addr *mptcp_local;
  9471. +
  9472. + fm_ns = fm_get_ns(net);
  9473. + cancel_delayed_work_sync(&fm_ns->address_worker);
  9474. +
  9475. + rcu_read_lock_bh();
  9476. +
  9477. + mptcp_local = rcu_dereference_bh(fm_ns->local);
  9478. + kfree(mptcp_local);
  9479. +
  9480. + spin_lock(&fm_ns->local_lock);
  9481. + list_for_each_entry_safe(eventq, tmp, &fm_ns->events, list) {
  9482. + list_del(&eventq->list);
  9483. + kfree(eventq);
  9484. + }
  9485. + spin_unlock(&fm_ns->local_lock);
  9486. +
  9487. + rcu_read_unlock_bh();
  9488. +
  9489. + kfree(fm_ns);
  9490. +}
  9491. +
  9492. +static struct pernet_operations full_mesh_net_ops = {
  9493. + .init = mptcp_fm_init_net,
  9494. + .exit = mptcp_fm_exit_net,
  9495. +};
  9496. +
  9497. +static struct mptcp_pm_ops full_mesh __read_mostly = {
  9498. + .new_session = full_mesh_new_session,
  9499. + .release_sock = full_mesh_release_sock,
  9500. + .fully_established = full_mesh_create_subflows,
  9501. + .new_remote_address = full_mesh_create_subflows,
  9502. + .get_local_index = full_mesh_get_local_index,
  9503. + .get_local_id = full_mesh_get_local_id,
  9504. + .addr_signal = full_mesh_addr_signal,
  9505. + .name = "fullmesh",
  9506. + .owner = THIS_MODULE,
  9507. +};
  9508. +
  9509. +/* General initialization of MPTCP_PM */
  9510. +static int __init full_mesh_register(void)
  9511. +{
  9512. + int ret;
  9513. +
  9514. + BUILD_BUG_ON(sizeof(struct fullmesh_priv) > MPTCP_PM_SIZE);
  9515. +
  9516. + ret = register_pernet_subsys(&full_mesh_net_ops);
  9517. + if (ret)
  9518. + goto out;
  9519. +
  9520. + ret = register_inetaddr_notifier(&mptcp_pm_inetaddr_notifier);
  9521. + if (ret)
  9522. + goto err_reg_inetaddr;
  9523. + ret = register_netdevice_notifier(&mptcp_pm_netdev_notifier);
  9524. + if (ret)
  9525. + goto err_reg_netdev;
  9526. +
  9527. +#if IS_ENABLED(CONFIG_IPV6)
  9528. + ret = register_inet6addr_notifier(&inet6_addr_notifier);
  9529. + if (ret)
  9530. + goto err_reg_inet6addr;
  9531. +#endif
  9532. +
  9533. + ret = mptcp_register_path_manager(&full_mesh);
  9534. + if (ret)
  9535. + goto err_reg_pm;
  9536. +
  9537. +out:
  9538. + return ret;
  9539. +
  9540. +
  9541. +err_reg_pm:
  9542. +#if IS_ENABLED(CONFIG_IPV6)
  9543. + unregister_inet6addr_notifier(&inet6_addr_notifier);
  9544. +err_reg_inet6addr:
  9545. +#endif
  9546. + unregister_netdevice_notifier(&mptcp_pm_netdev_notifier);
  9547. +err_reg_netdev:
  9548. + unregister_inetaddr_notifier(&mptcp_pm_inetaddr_notifier);
  9549. +err_reg_inetaddr:
  9550. + unregister_pernet_subsys(&full_mesh_net_ops);
  9551. + goto out;
  9552. +}
  9553. +
  9554. +static void full_mesh_unregister(void)
  9555. +{
  9556. +#if IS_ENABLED(CONFIG_IPV6)
  9557. + unregister_inet6addr_notifier(&inet6_addr_notifier);
  9558. +#endif
  9559. + unregister_netdevice_notifier(&mptcp_pm_netdev_notifier);
  9560. + unregister_inetaddr_notifier(&mptcp_pm_inetaddr_notifier);
  9561. + unregister_pernet_subsys(&full_mesh_net_ops);
  9562. + mptcp_unregister_path_manager(&full_mesh);
  9563. +}
  9564. +
  9565. +module_init(full_mesh_register);
  9566. +module_exit(full_mesh_unregister);
  9567. +
  9568. +MODULE_AUTHOR("Christoph Paasch");
  9569. +MODULE_LICENSE("GPL");
  9570. +MODULE_DESCRIPTION("Full-Mesh MPTCP");
  9571. +MODULE_VERSION("0.88");
  9572. diff -Nur linux-3.14.45.orig/net/mptcp/mptcp_input.c linux-3.14.45/net/mptcp/mptcp_input.c
  9573. --- linux-3.14.45.orig/net/mptcp/mptcp_input.c 1970-01-01 01:00:00.000000000 +0100
  9574. +++ linux-3.14.45/net/mptcp/mptcp_input.c 2015-06-24 14:15:48.895862487 +0200
  9575. @@ -0,0 +1,2254 @@
  9576. +/*
  9577. + * MPTCP implementation - Sending side
  9578. + *
  9579. + * Initial Design & Implementation:
  9580. + * Sébastien Barré <sebastien.barre@uclouvain.be>
  9581. + *
  9582. + * Current Maintainer & Author:
  9583. + * Christoph Paasch <christoph.paasch@uclouvain.be>
  9584. + *
  9585. + * Additional authors:
  9586. + * Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
  9587. + * Gregory Detal <gregory.detal@uclouvain.be>
  9588. + * Fabien Duchêne <fabien.duchene@uclouvain.be>
  9589. + * Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
  9590. + * Lavkesh Lahngir <lavkesh51@gmail.com>
  9591. + * Andreas Ripke <ripke@neclab.eu>
  9592. + * Vlad Dogaru <vlad.dogaru@intel.com>
  9593. + * Octavian Purdila <octavian.purdila@intel.com>
  9594. + * John Ronan <jronan@tssg.org>
  9595. + * Catalin Nicutar <catalin.nicutar@gmail.com>
  9596. + * Brandon Heller <brandonh@stanford.edu>
  9597. + *
  9598. + *
  9599. + * This program is free software; you can redistribute it and/or
  9600. + * modify it under the terms of the GNU General Public License
  9601. + * as published by the Free Software Foundation; either version
  9602. + * 2 of the License, or (at your option) any later version.
  9603. + */
  9604. +
  9605. +#include <asm/unaligned.h>
  9606. +
  9607. +#include <net/mptcp.h>
  9608. +#include <net/mptcp_v4.h>
  9609. +#include <net/mptcp_v6.h>
  9610. +
  9611. +#include <linux/kconfig.h>
  9612. +
  9613. +/* is seq1 < seq2 ? */
  9614. +static inline int before64(const u64 seq1, const u64 seq2)
  9615. +{
  9616. + return (s64)(seq1 - seq2) < 0;
  9617. +}
  9618. +
  9619. +/* is seq1 > seq2 ? */
  9620. +#define after64(seq1, seq2) before64(seq2, seq1)
  9621. +
  9622. +static inline void mptcp_become_fully_estab(struct sock *sk)
  9623. +{
  9624. + tcp_sk(sk)->mptcp->fully_established = 1;
  9625. +
  9626. + if (is_master_tp(tcp_sk(sk)) &&
  9627. + tcp_sk(sk)->mpcb->pm_ops->fully_established)
  9628. + tcp_sk(sk)->mpcb->pm_ops->fully_established(mptcp_meta_sk(sk));
  9629. +}
  9630. +
  9631. +/* Similar to tcp_tso_acked without any memory accounting */
  9632. +static inline int mptcp_tso_acked_reinject(struct sock *sk, struct sk_buff *skb)
  9633. +{
  9634. + struct tcp_sock *tp = tcp_sk(sk);
  9635. + u32 packets_acked, len;
  9636. +
  9637. + BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
  9638. +
  9639. + packets_acked = tcp_skb_pcount(skb);
  9640. +
  9641. + if (skb_unclone(skb, GFP_ATOMIC))
  9642. + return 0;
  9643. +
  9644. + len = tp->snd_una - TCP_SKB_CB(skb)->seq;
  9645. + __pskb_trim_head(skb, len);
  9646. +
  9647. + TCP_SKB_CB(skb)->seq += len;
  9648. + skb->ip_summed = CHECKSUM_PARTIAL;
  9649. + skb->truesize -= len;
  9650. +
  9651. + /* Any change of skb->len requires recalculation of tso factor. */
  9652. + if (tcp_skb_pcount(skb) > 1)
  9653. + tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb));
  9654. + packets_acked -= tcp_skb_pcount(skb);
  9655. +
  9656. + if (packets_acked) {
  9657. + BUG_ON(tcp_skb_pcount(skb) == 0);
  9658. + BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
  9659. + }
  9660. +
  9661. + return packets_acked;
  9662. +}
  9663. +
  9664. +/**
  9665. + * Cleans the meta-socket retransmission queue and the reinject-queue.
  9666. + * @sk must be the metasocket.
  9667. + */
  9668. +static void mptcp_clean_rtx_queue(struct sock *meta_sk, u32 prior_snd_una)
  9669. +{
  9670. + struct sk_buff *skb, *tmp;
  9671. + struct tcp_sock *meta_tp = tcp_sk(meta_sk);
  9672. + struct mptcp_cb *mpcb = meta_tp->mpcb;
  9673. + bool acked = false;
  9674. + u32 acked_pcount;
  9675. +
  9676. + while ((skb = tcp_write_queue_head(meta_sk)) &&
  9677. + skb != tcp_send_head(meta_sk)) {
  9678. + bool fully_acked = true;
  9679. +
  9680. + if (before(meta_tp->snd_una, TCP_SKB_CB(skb)->end_seq)) {
  9681. + if (tcp_skb_pcount(skb) == 1 ||
  9682. + !after(meta_tp->snd_una, TCP_SKB_CB(skb)->seq))
  9683. + break;
  9684. +
  9685. + acked_pcount = tcp_tso_acked(meta_sk, skb);
  9686. + if (!acked_pcount)
  9687. + break;
  9688. +
  9689. + fully_acked = false;
  9690. + } else {
  9691. + acked_pcount = tcp_skb_pcount(skb);
  9692. + }
  9693. +
  9694. + acked = true;
  9695. + meta_tp->packets_out -= acked_pcount;
  9696. + meta_tp->retrans_stamp = 0;
  9697. +
  9698. + if (!fully_acked)
  9699. + break;
  9700. +
  9701. + tcp_unlink_write_queue(skb, meta_sk);
  9702. +
  9703. + if (mptcp_is_data_fin(skb)) {
  9704. + struct sock *sk_it;
  9705. +
  9706. + /* DATA_FIN has been acknowledged - now we can close
  9707. + * the subflows
  9708. + */
  9709. + mptcp_for_each_sk(mpcb, sk_it) {
  9710. + unsigned long delay = 0;
  9711. +
  9712. + /* If we are the passive closer, don't trigger
  9713. + * subflow-fin until the subflow has been finned
  9714. + * by the peer - thus we add a delay.
  9715. + */
  9716. + if (mpcb->passive_close &&
  9717. + sk_it->sk_state == TCP_ESTABLISHED)
  9718. + delay = inet_csk(sk_it)->icsk_rto << 3;
  9719. +
  9720. + mptcp_sub_close(sk_it, delay);
  9721. + }
  9722. + }
  9723. + sk_wmem_free_skb(meta_sk, skb);
  9724. + }
  9725. + /* Remove acknowledged data from the reinject queue */
  9726. + skb_queue_walk_safe(&mpcb->reinject_queue, skb, tmp) {
  9727. + if (before(meta_tp->snd_una, TCP_SKB_CB(skb)->end_seq)) {
  9728. + if (tcp_skb_pcount(skb) == 1 ||
  9729. + !after(meta_tp->snd_una, TCP_SKB_CB(skb)->seq))
  9730. + break;
  9731. +
  9732. + mptcp_tso_acked_reinject(meta_sk, skb);
  9733. + break;
  9734. + }
  9735. +
  9736. + __skb_unlink(skb, &mpcb->reinject_queue);
  9737. + __kfree_skb(skb);
  9738. + }
  9739. +
  9740. + if (likely(between(meta_tp->snd_up, prior_snd_una, meta_tp->snd_una)))
  9741. + meta_tp->snd_up = meta_tp->snd_una;
  9742. +
  9743. + if (acked) {
  9744. + tcp_rearm_rto(meta_sk);
  9745. + /* Normally this is done in tcp_try_undo_loss - but MPTCP
  9746. + * does not call this function.
  9747. + */
  9748. + inet_csk(meta_sk)->icsk_retransmits = 0;
  9749. + }
  9750. +}
  9751. +
  9752. +/* Inspired by tcp_rcv_state_process */
  9753. +static int mptcp_rcv_state_process(struct sock *meta_sk, struct sock *sk,
  9754. + const struct sk_buff *skb, u32 data_seq,
  9755. + u16 data_len)
  9756. +{
  9757. + struct tcp_sock *meta_tp = tcp_sk(meta_sk), *tp = tcp_sk(sk);
  9758. + struct tcphdr *th = tcp_hdr(skb);
  9759. +
  9760. + /* State-machine handling if FIN has been enqueued and he has
  9761. + * been acked (snd_una == write_seq) - it's important that this
  9762. + * here is after sk_wmem_free_skb because otherwise
  9763. + * sk_forward_alloc is wrong upon inet_csk_destroy_sock()
  9764. + */
  9765. + switch (meta_sk->sk_state) {
  9766. + case TCP_FIN_WAIT1:
  9767. + if (meta_tp->snd_una == meta_tp->write_seq) {
  9768. + struct dst_entry *dst = __sk_dst_get(meta_sk);
  9769. +
  9770. + tcp_set_state(meta_sk, TCP_FIN_WAIT2);
  9771. + meta_sk->sk_shutdown |= SEND_SHUTDOWN;
  9772. +
  9773. + dst = __sk_dst_get(sk);
  9774. + if (dst)
  9775. + dst_confirm(dst);
  9776. +
  9777. + if (!sock_flag(meta_sk, SOCK_DEAD)) {
  9778. + /* Wake up lingering close() */
  9779. + meta_sk->sk_state_change(meta_sk);
  9780. + } else {
  9781. + int tmo;
  9782. +
  9783. + if (meta_tp->linger2 < 0 ||
  9784. + (data_len &&
  9785. + after(data_seq + data_len - (mptcp_is_data_fin2(skb, tp) ? 1 : 0),
  9786. + meta_tp->rcv_nxt))) {
  9787. + mptcp_send_active_reset(meta_sk, GFP_ATOMIC);
  9788. + tcp_done(meta_sk);
  9789. + NET_INC_STATS_BH(sock_net(meta_sk), LINUX_MIB_TCPABORTONDATA);
  9790. + return 1;
  9791. + }
  9792. +
  9793. + tmo = tcp_fin_time(meta_sk);
  9794. + if (tmo > TCP_TIMEWAIT_LEN) {
  9795. + inet_csk_reset_keepalive_timer(meta_sk, tmo - TCP_TIMEWAIT_LEN);
  9796. + } else if (mptcp_is_data_fin2(skb, tp) ||
  9797. + sock_owned_by_user(meta_sk)) {
  9798. + /* Bad case. We could lose such FIN otherwise.
  9799. + * It is not a big problem, but it looks confusing
  9800. + * and not so rare event. We still can lose it now,
  9801. + * if it spins in bh_lock_sock(), but it is really
  9802. + * marginal case.
  9803. + */
  9804. + inet_csk_reset_keepalive_timer(meta_sk, tmo);
  9805. + } else {
  9806. + tcp_time_wait(meta_sk, TCP_FIN_WAIT2, tmo);
  9807. + }
  9808. + }
  9809. + }
  9810. + break;
  9811. + case TCP_CLOSING:
  9812. + case TCP_LAST_ACK:
  9813. + if (meta_tp->snd_una == meta_tp->write_seq) {
  9814. + tcp_done(meta_sk);
  9815. + return 1;
  9816. + }
  9817. + break;
  9818. + }
  9819. +
  9820. + /* step 7: process the segment text */
  9821. + switch (meta_sk->sk_state) {
  9822. + case TCP_FIN_WAIT1:
  9823. + case TCP_FIN_WAIT2:
  9824. + /* RFC 793 says to queue data in these states,
  9825. + * RFC 1122 says we MUST send a reset.
  9826. + * BSD 4.4 also does reset.
  9827. + */
  9828. + if (meta_sk->sk_shutdown & RCV_SHUTDOWN) {
  9829. + if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
  9830. + after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt) &&
  9831. + !mptcp_is_data_fin2(skb, tp)) {
  9832. + NET_INC_STATS_BH(sock_net(meta_sk), LINUX_MIB_TCPABORTONDATA);
  9833. + mptcp_send_active_reset(meta_sk, GFP_ATOMIC);
  9834. + tcp_reset(meta_sk);
  9835. + return 1;
  9836. + }
  9837. + }
  9838. + break;
  9839. + }
  9840. +
  9841. + return 0;
  9842. +}
  9843. +
  9844. +/**
  9845. + * @return:
  9846. + * i) 1: Everything's fine.
  9847. + * ii) -1: A reset has been sent on the subflow - csum-failure
  9848. + * iii) 0: csum-failure but no reset sent, because it's the last subflow.
  9849. + * Last packet should not be destroyed by the caller because it has
  9850. + * been done here.
  9851. + */
  9852. +static int mptcp_verif_dss_csum(struct sock *sk)
  9853. +{
  9854. + struct tcp_sock *tp = tcp_sk(sk);
  9855. + struct sk_buff *tmp, *tmp1, *last = NULL;
  9856. + __wsum csum_tcp = 0; /* cumulative checksum of pld + mptcp-header */
  9857. + int ans = 1, overflowed = 0, offset = 0, dss_csum_added = 0;
  9858. + int iter = 0;
  9859. +
  9860. + skb_queue_walk_safe(&sk->sk_receive_queue, tmp, tmp1) {
  9861. + unsigned int csum_len;
  9862. +
  9863. + if (before(tp->mptcp->map_subseq + tp->mptcp->map_data_len, TCP_SKB_CB(tmp)->end_seq))
  9864. + /* Mapping ends in the middle of the packet -
  9865. + * csum only these bytes
  9866. + */
  9867. + csum_len = tp->mptcp->map_subseq + tp->mptcp->map_data_len - TCP_SKB_CB(tmp)->seq;
  9868. + else
  9869. + csum_len = tmp->len;
  9870. +
  9871. + offset = 0;
  9872. + if (overflowed) {
  9873. + char first_word[4];
  9874. + first_word[0] = 0;
  9875. + first_word[1] = 0;
  9876. + first_word[2] = 0;
  9877. + first_word[3] = *(tmp->data);
  9878. + csum_tcp = csum_partial(first_word, 4, csum_tcp);
  9879. + offset = 1;
  9880. + csum_len--;
  9881. + overflowed = 0;
  9882. + }
  9883. +
  9884. + csum_tcp = skb_checksum(tmp, offset, csum_len, csum_tcp);
  9885. +
  9886. + /* Was it on an odd-length? Then we have to merge the next byte
  9887. + * correctly (see above)
  9888. + */
  9889. + if (csum_len != (csum_len & (~1)))
  9890. + overflowed = 1;
  9891. +
  9892. + if (mptcp_is_data_seq(tmp) && !dss_csum_added) {
  9893. + __be32 data_seq = htonl((u32)(tp->mptcp->map_data_seq >> 32));
  9894. +
  9895. + /* If a 64-bit dss is present, we increase the offset
  9896. + * by 4 bytes, as the high-order 64-bits will be added
  9897. + * in the final csum_partial-call.
  9898. + */
  9899. + u32 offset = skb_transport_offset(tmp) +
  9900. + TCP_SKB_CB(tmp)->dss_off;
  9901. + if (TCP_SKB_CB(tmp)->mptcp_flags & MPTCPHDR_SEQ64_SET)
  9902. + offset += 4;
  9903. +
  9904. + csum_tcp = skb_checksum(tmp, offset,
  9905. + MPTCP_SUB_LEN_SEQ_CSUM,
  9906. + csum_tcp);
  9907. +
  9908. + csum_tcp = csum_partial(&data_seq,
  9909. + sizeof(data_seq), csum_tcp);
  9910. +
  9911. + dss_csum_added = 1; /* Just do it once */
  9912. + }
  9913. + last = tmp;
  9914. + iter++;
  9915. +
  9916. + if (!skb_queue_is_last(&sk->sk_receive_queue, tmp) &&
  9917. + !before(TCP_SKB_CB(tmp1)->seq,
  9918. + tp->mptcp->map_subseq + tp->mptcp->map_data_len))
  9919. + break;
  9920. + }
  9921. +
  9922. + /* Now, checksum must be 0 */
  9923. + if (unlikely(csum_fold(csum_tcp))) {
  9924. + pr_err("%s csum is wrong: %#x data_seq %u dss_csum_added %d overflowed %d iterations %d\n",
  9925. + __func__, csum_fold(csum_tcp),
  9926. + TCP_SKB_CB(last)->seq, dss_csum_added, overflowed,
  9927. + iter);
  9928. +
  9929. + tp->mptcp->send_mp_fail = 1;
  9930. +
  9931. + /* map_data_seq is the data-seq number of the
  9932. + * mapping we are currently checking
  9933. + */
  9934. + tp->mpcb->csum_cutoff_seq = tp->mptcp->map_data_seq;
  9935. +
  9936. + if (tp->mpcb->cnt_subflows > 1) {
  9937. + mptcp_send_reset(sk);
  9938. + ans = -1;
  9939. + } else {
  9940. + tp->mpcb->send_infinite_mapping = 1;
  9941. +
  9942. + /* Need to purge the rcv-queue as it's no more valid */
  9943. + while ((tmp = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
  9944. + tp->copied_seq = TCP_SKB_CB(tmp)->end_seq;
  9945. + kfree_skb(tmp);
  9946. + }
  9947. +
  9948. + ans = 0;
  9949. + }
  9950. + }
  9951. +
  9952. + return ans;
  9953. +}
  9954. +
  9955. +static inline void mptcp_prepare_skb(struct sk_buff *skb, struct sk_buff *next,
  9956. + struct sock *sk)
  9957. +{
  9958. + struct tcp_sock *tp = tcp_sk(sk);
  9959. + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
  9960. + /* Adapt data-seq's to the packet itself. We kinda transform the
  9961. + * dss-mapping to a per-packet granularity. This is necessary to
  9962. + * correctly handle overlapping mappings coming from different
  9963. + * subflows. Otherwise it would be a complete mess.
  9964. + */
  9965. + tcb->seq = ((u32)tp->mptcp->map_data_seq) + tcb->seq - tp->mptcp->map_subseq;
  9966. + tcb->end_seq = tcb->seq + skb->len;
  9967. +
  9968. + /* If cur is the last one in the rcv-queue (or the last one for this
  9969. + * mapping), and data_fin is enqueued, the end_data_seq is +1.
  9970. + */
  9971. + if (skb_queue_is_last(&sk->sk_receive_queue, skb) ||
  9972. + after(TCP_SKB_CB(next)->end_seq, tp->mptcp->map_subseq + tp->mptcp->map_data_len)) {
  9973. + tcb->end_seq += tp->mptcp->map_data_fin;
  9974. +
  9975. + /* We manually set the fin-flag if it is a data-fin. For easy
  9976. + * processing in tcp_recvmsg.
  9977. + */
  9978. + if (mptcp_is_data_fin2(skb, tp))
  9979. + tcp_hdr(skb)->fin = 1;
  9980. + else
  9981. + tcp_hdr(skb)->fin = 0;
  9982. + } else {
  9983. + /* We may have a subflow-fin with data but without data-fin */
  9984. + tcp_hdr(skb)->fin = 0;
  9985. + }
  9986. +}
  9987. +
  9988. +/**
  9989. + * @return: 1 if the segment has been eaten and can be suppressed,
  9990. + * otherwise 0.
  9991. + */
  9992. +static inline int mptcp_direct_copy(struct sk_buff *skb, struct sock *meta_sk)
  9993. +{
  9994. + struct tcp_sock *meta_tp = tcp_sk(meta_sk);
  9995. + int chunk = min_t(unsigned int, skb->len, meta_tp->ucopy.len);
  9996. + int eaten = 0;
  9997. +
  9998. + __set_current_state(TASK_RUNNING);
  9999. +
  10000. + local_bh_enable();
  10001. + if (!skb_copy_datagram_iovec(skb, 0, meta_tp->ucopy.iov, chunk)) {
  10002. + meta_tp->ucopy.len -= chunk;
  10003. + meta_tp->copied_seq += chunk;
  10004. + eaten = (chunk == skb->len);
  10005. + tcp_rcv_space_adjust(meta_sk);
  10006. + }
  10007. + local_bh_disable();
  10008. + return eaten;
  10009. +}
  10010. +
  10011. +static inline void mptcp_reset_mapping(struct tcp_sock *tp)
  10012. +{
  10013. + tp->mptcp->map_data_len = 0;
  10014. + tp->mptcp->map_data_seq = 0;
  10015. + tp->mptcp->map_subseq = 0;
  10016. + tp->mptcp->map_data_fin = 0;
  10017. + tp->mptcp->mapping_present = 0;
  10018. +}
  10019. +
  10020. +/* The DSS-mapping received on the sk only covers the second half of the skb
  10021. + * (cut at seq). We trim the head from the skb.
  10022. + * Data will be freed upon kfree().
  10023. + *
  10024. + * Inspired by tcp_trim_head().
  10025. + */
  10026. +static void mptcp_skb_trim_head(struct sk_buff *skb, struct sock *sk, u32 seq)
  10027. +{
  10028. + int len = seq - TCP_SKB_CB(skb)->seq;
  10029. + u32 new_seq = TCP_SKB_CB(skb)->seq + len;
  10030. +
  10031. + if (len < skb_headlen(skb))
  10032. + __skb_pull(skb, len);
  10033. + else
  10034. + __pskb_trim_head(skb, len - skb_headlen(skb));
  10035. +
  10036. + TCP_SKB_CB(skb)->seq = new_seq;
  10037. +
  10038. + skb->truesize -= len;
  10039. + atomic_sub(len, &sk->sk_rmem_alloc);
  10040. + sk_mem_uncharge(sk, len);
  10041. +}
  10042. +
  10043. +/* The DSS-mapping received on the sk only covers the first half of the skb
  10044. + * (cut at seq). We create a second skb (@return), and queue it in the rcv-queue
  10045. + * as further packets may resolve the mapping of the second half of data.
  10046. + *
  10047. + * Inspired by tcp_fragment().
  10048. + */
  10049. +static int mptcp_skb_split_tail(struct sk_buff *skb, struct sock *sk, u32 seq)
  10050. +{
  10051. + struct sk_buff *buff;
  10052. + int nsize;
  10053. + int nlen, len;
  10054. +
  10055. + len = seq - TCP_SKB_CB(skb)->seq;
  10056. + nsize = skb_headlen(skb) - len + tcp_sk(sk)->tcp_header_len;
  10057. + if (nsize < 0)
  10058. + nsize = 0;
  10059. +
  10060. + /* Get a new skb... force flag on. */
  10061. + buff = alloc_skb(nsize, GFP_ATOMIC);
  10062. + if (buff == NULL)
  10063. + return -ENOMEM;
  10064. +
  10065. + skb_reserve(buff, tcp_sk(sk)->tcp_header_len);
  10066. + skb_reset_transport_header(buff);
  10067. +
  10068. + tcp_hdr(buff)->fin = tcp_hdr(skb)->fin;
  10069. + tcp_hdr(skb)->fin = 0;
  10070. +
  10071. + /* We absolutly need to call skb_set_owner_r before refreshing the
  10072. + * truesize of buff, otherwise the moved data will account twice.
  10073. + */
  10074. + skb_set_owner_r(buff, sk);
  10075. + nlen = skb->len - len - nsize;
  10076. + buff->truesize += nlen;
  10077. + skb->truesize -= nlen;
  10078. +
  10079. + /* Correct the sequence numbers. */
  10080. + TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
  10081. + TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
  10082. + TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
  10083. +
  10084. + skb_split(skb, buff, len);
  10085. +
  10086. + __skb_queue_after(&sk->sk_receive_queue, skb, buff);
  10087. +
  10088. + return 0;
  10089. +}
  10090. +
  10091. +/* @return: 0 everything is fine. Just continue processing
  10092. + * 1 subflow is broken stop everything
  10093. + * -1 this packet was broken - continue with the next one.
  10094. + */
  10095. +static int mptcp_prevalidate_skb(struct sock *sk, struct sk_buff *skb)
  10096. +{
  10097. + struct tcp_sock *tp = tcp_sk(sk);
  10098. +
  10099. + /* If we are in infinite mode, the subflow-fin is in fact a data-fin. */
  10100. + if (!skb->len && tcp_hdr(skb)->fin && !mptcp_is_data_fin(skb) &&
  10101. + !tp->mpcb->infinite_mapping_rcv) {
  10102. + /* Remove a pure subflow-fin from the queue and increase
  10103. + * copied_seq.
  10104. + */
  10105. + tp->copied_seq = TCP_SKB_CB(skb)->end_seq;
  10106. + __skb_unlink(skb, &sk->sk_receive_queue);
  10107. + __kfree_skb(skb);
  10108. + return -1;
  10109. + }
  10110. +
  10111. + /* If we are not yet fully established and do not know the mapping for
  10112. + * this segment, this path has to fallback to infinite or be torn down.
  10113. + */
  10114. + if (!tp->mptcp->fully_established && !mptcp_is_data_seq(skb) &&
  10115. + !tp->mptcp->mapping_present && !tp->mpcb->infinite_mapping_rcv) {
  10116. + pr_err("%s %#x will fallback - pi %d from %pS, seq %u\n",
  10117. + __func__, tp->mpcb->mptcp_loc_token,
  10118. + tp->mptcp->path_index, __builtin_return_address(0),
  10119. + TCP_SKB_CB(skb)->seq);
  10120. +
  10121. + if (!is_master_tp(tp)) {
  10122. + mptcp_send_reset(sk);
  10123. + return 1;
  10124. + }
  10125. +
  10126. + tp->mpcb->infinite_mapping_snd = 1;
  10127. + tp->mpcb->infinite_mapping_rcv = 1;
  10128. + tp->mptcp->fully_established = 1;
  10129. + }
  10130. +
  10131. + /* Receiver-side becomes fully established when a whole rcv-window has
  10132. + * been received without the need to fallback due to the previous
  10133. + * condition. */
  10134. + if (!tp->mptcp->fully_established) {
  10135. + tp->mptcp->init_rcv_wnd -= skb->len;
  10136. + if (tp->mptcp->init_rcv_wnd < 0)
  10137. + mptcp_become_fully_estab(sk);
  10138. + }
  10139. +
  10140. + return 0;
  10141. +}
  10142. +
  10143. +/* @return: 0 everything is fine. Just continue processing
  10144. + * 1 subflow is broken stop everything
  10145. + * -1 this packet was broken - continue with the next one.
  10146. + */
  10147. +static int mptcp_detect_mapping(struct sock *sk, struct sk_buff *skb)
  10148. +{
  10149. + struct tcp_sock *tp = tcp_sk(sk), *meta_tp = mptcp_meta_tp(tp);
  10150. + struct mptcp_cb *mpcb = tp->mpcb;
  10151. + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
  10152. + u32 *ptr;
  10153. + u32 data_seq, sub_seq, data_len, tcp_end_seq;
  10154. +
  10155. + /* If we are in infinite-mapping-mode, the subflow is guaranteed to be
  10156. + * in-order at the data-level. Thus data-seq-numbers can be inferred
  10157. + * from what is expected at the data-level.
  10158. + */
  10159. + if (mpcb->infinite_mapping_rcv) {
  10160. + tp->mptcp->map_data_seq = mptcp_get_rcv_nxt_64(meta_tp);
  10161. + tp->mptcp->map_subseq = tcb->seq;
  10162. + tp->mptcp->map_data_len = skb->len;
  10163. + tp->mptcp->map_data_fin = tcp_hdr(skb)->fin;
  10164. + tp->mptcp->mapping_present = 1;
  10165. + return 0;
  10166. + }
  10167. +
  10168. + /* No mapping here? Exit - it is either already set or still on its way */
  10169. + if (!mptcp_is_data_seq(skb)) {
  10170. + /* Too many packets without a mapping - this subflow is broken */
  10171. + if (!tp->mptcp->mapping_present &&
  10172. + tp->rcv_nxt - tp->copied_seq > 65536) {
  10173. + mptcp_send_reset(sk);
  10174. + return 1;
  10175. + }
  10176. +
  10177. + return 0;
  10178. + }
  10179. +
  10180. + ptr = mptcp_skb_set_data_seq(skb, &data_seq, mpcb);
  10181. + ptr++;
  10182. + sub_seq = get_unaligned_be32(ptr) + tp->mptcp->rcv_isn;
  10183. + ptr++;
  10184. + data_len = get_unaligned_be16(ptr);
  10185. +
  10186. + /* If it's an empty skb with DATA_FIN, sub_seq must get fixed.
  10187. + * The draft sets it to 0, but we really would like to have the
  10188. + * real value, to have an easy handling afterwards here in this
  10189. + * function.
  10190. + */
  10191. + if (mptcp_is_data_fin(skb) && skb->len == 0)
  10192. + sub_seq = TCP_SKB_CB(skb)->seq;
  10193. +
  10194. + /* If there is already a mapping - we check if it maps with the current
  10195. + * one. If not - we reset.
  10196. + */
  10197. + if (tp->mptcp->mapping_present &&
  10198. + (data_seq != (u32)tp->mptcp->map_data_seq ||
  10199. + sub_seq != tp->mptcp->map_subseq ||
  10200. + data_len != tp->mptcp->map_data_len + tp->mptcp->map_data_fin ||
  10201. + mptcp_is_data_fin(skb) != tp->mptcp->map_data_fin)) {
  10202. + /* Mapping in packet is different from what we want */
  10203. + pr_err("%s Mappings do not match!\n", __func__);
  10204. + pr_err("%s dseq %u mdseq %u, sseq %u msseq %u dlen %u mdlen %u dfin %d mdfin %d\n",
  10205. + __func__, data_seq, (u32)tp->mptcp->map_data_seq,
  10206. + sub_seq, tp->mptcp->map_subseq, data_len,
  10207. + tp->mptcp->map_data_len, mptcp_is_data_fin(skb),
  10208. + tp->mptcp->map_data_fin);
  10209. + mptcp_send_reset(sk);
  10210. + return 1;
  10211. + }
  10212. +
  10213. + /* If the previous check was good, the current mapping is valid and we exit. */
  10214. + if (tp->mptcp->mapping_present)
  10215. + return 0;
  10216. +
  10217. + /* Mapping not yet set on this subflow - we set it here! */
  10218. +
  10219. + if (!data_len) {
  10220. + mpcb->infinite_mapping_rcv = 1;
  10221. + tp->mptcp->fully_established = 1;
  10222. + /* We need to repeat mp_fail's until the sender felt
  10223. + * back to infinite-mapping - here we stop repeating it.
  10224. + */
  10225. + tp->mptcp->send_mp_fail = 0;
  10226. +
  10227. + /* We have to fixup data_len - it must be the same as skb->len */
  10228. + data_len = skb->len + (mptcp_is_data_fin(skb) ? 1 : 0);
  10229. + sub_seq = tcb->seq;
  10230. +
  10231. + /* TODO kill all other subflows than this one */
  10232. + /* data_seq and so on are set correctly */
  10233. +
  10234. + /* At this point, the meta-ofo-queue has to be emptied,
  10235. + * as the following data is guaranteed to be in-order at
  10236. + * the data and subflow-level
  10237. + */
  10238. + mptcp_purge_ofo_queue(meta_tp);
  10239. + }
  10240. +
  10241. + /* We are sending mp-fail's and thus are in fallback mode.
  10242. + * Ignore packets which do not announce the fallback and still
  10243. + * want to provide a mapping.
  10244. + */
  10245. + if (tp->mptcp->send_mp_fail) {
  10246. + tp->copied_seq = TCP_SKB_CB(skb)->end_seq;
  10247. + __skb_unlink(skb, &sk->sk_receive_queue);
  10248. + __kfree_skb(skb);
  10249. + return -1;
  10250. + }
  10251. +
  10252. + /* FIN increased the mapping-length by 1 */
  10253. + if (mptcp_is_data_fin(skb))
  10254. + data_len--;
  10255. +
  10256. + /* Subflow-sequences of packet must be
  10257. + * (at least partially) be part of the DSS-mapping's
  10258. + * subflow-sequence-space.
  10259. + *
  10260. + * Basically the mapping is not valid, if either of the
  10261. + * following conditions is true:
  10262. + *
  10263. + * 1. It's not a data_fin and
  10264. + * MPTCP-sub_seq >= TCP-end_seq
  10265. + *
  10266. + * 2. It's a data_fin and TCP-end_seq > TCP-seq and
  10267. + * MPTCP-sub_seq >= TCP-end_seq
  10268. + *
  10269. + * The previous two can be merged into:
  10270. + * TCP-end_seq > TCP-seq and MPTCP-sub_seq >= TCP-end_seq
  10271. + * Because if it's not a data-fin, TCP-end_seq > TCP-seq
  10272. + *
  10273. + * 3. It's a data_fin and skb->len == 0 and
  10274. + * MPTCP-sub_seq > TCP-end_seq
  10275. + *
  10276. + * 4. It's not a data_fin and TCP-end_seq > TCP-seq and
  10277. + * MPTCP-sub_seq + MPTCP-data_len <= TCP-seq
  10278. + *
  10279. + * 5. MPTCP-sub_seq is prior to what we already copied (copied_seq)
  10280. + */
  10281. +
  10282. + /* subflow-fin is not part of the mapping - ignore it here ! */
  10283. + tcp_end_seq = tcb->end_seq - tcp_hdr(skb)->fin;
  10284. + if ((!before(sub_seq, tcb->end_seq) && after(tcp_end_seq, tcb->seq)) ||
  10285. + (mptcp_is_data_fin(skb) && skb->len == 0 && after(sub_seq, tcb->end_seq)) ||
  10286. + (!after(sub_seq + data_len, tcb->seq) && after(tcp_end_seq, tcb->seq)) ||
  10287. + before(sub_seq, tp->copied_seq)) {
  10288. + /* Subflow-sequences of packet is different from what is in the
  10289. + * packet's dss-mapping. The peer is misbehaving - reset
  10290. + */
  10291. + pr_err("%s Packet's mapping does not map to the DSS sub_seq %u "
  10292. + "end_seq %u, tcp_end_seq %u seq %u dfin %u len %u data_len %u"
  10293. + "copied_seq %u\n", __func__, sub_seq, tcb->end_seq, tcp_end_seq, tcb->seq, mptcp_is_data_fin(skb),
  10294. + skb->len, data_len, tp->copied_seq);
  10295. + mptcp_send_reset(sk);
  10296. + return 1;
  10297. + }
  10298. +
  10299. + /* Does the DSS had 64-bit seqnum's ? */
  10300. + if (!(tcb->mptcp_flags & MPTCPHDR_SEQ64_SET)) {
  10301. + /* Wrapped around? */
  10302. + if (unlikely(after(data_seq, meta_tp->rcv_nxt) && data_seq < meta_tp->rcv_nxt)) {
  10303. + tp->mptcp->map_data_seq = mptcp_get_data_seq_64(mpcb, !mpcb->rcv_hiseq_index, data_seq);
  10304. + } else {
  10305. + /* Else, access the default high-order bits */
  10306. + tp->mptcp->map_data_seq = mptcp_get_data_seq_64(mpcb, mpcb->rcv_hiseq_index, data_seq);
  10307. + }
  10308. + } else {
  10309. + tp->mptcp->map_data_seq = mptcp_get_data_seq_64(mpcb, (tcb->mptcp_flags & MPTCPHDR_SEQ64_INDEX) ? 1 : 0, data_seq);
  10310. +
  10311. + if (unlikely(tcb->mptcp_flags & MPTCPHDR_SEQ64_OFO)) {
  10312. + /* We make sure that the data_seq is invalid.
  10313. + * It will be dropped later.
  10314. + */
  10315. + tp->mptcp->map_data_seq += 0xFFFFFFFF;
  10316. + tp->mptcp->map_data_seq += 0xFFFFFFFF;
  10317. + }
  10318. + }
  10319. +
  10320. + tp->mptcp->map_data_len = data_len;
  10321. + tp->mptcp->map_subseq = sub_seq;
  10322. + tp->mptcp->map_data_fin = mptcp_is_data_fin(skb) ? 1 : 0;
  10323. + tp->mptcp->mapping_present = 1;
  10324. +
  10325. + return 0;
  10326. +}
  10327. +
  10328. +/* Similar to tcp_sequence(...) */
  10329. +static inline int mptcp_sequence(const struct tcp_sock *meta_tp,
  10330. + u64 data_seq, u64 end_data_seq)
  10331. +{
  10332. + struct mptcp_cb *mpcb = meta_tp->mpcb;
  10333. + u64 rcv_wup64;
  10334. +
  10335. + /* Wrap-around? */
  10336. + if (meta_tp->rcv_wup > meta_tp->rcv_nxt) {
  10337. + rcv_wup64 = ((u64)(mpcb->rcv_high_order[mpcb->rcv_hiseq_index] - 1) << 32) |
  10338. + meta_tp->rcv_wup;
  10339. + } else {
  10340. + rcv_wup64 = mptcp_get_data_seq_64(mpcb, mpcb->rcv_hiseq_index,
  10341. + meta_tp->rcv_wup);
  10342. + }
  10343. +
  10344. + return !before64(end_data_seq, rcv_wup64) &&
  10345. + !after64(data_seq, mptcp_get_rcv_nxt_64(meta_tp) + tcp_receive_window(meta_tp));
  10346. +}
  10347. +
  10348. +/* @return: 0 everything is fine. Just continue processing
  10349. + * -1 this packet was broken - continue with the next one.
  10350. + */
  10351. +static int mptcp_validate_mapping(struct sock *sk, struct sk_buff *skb)
  10352. +{
  10353. + struct tcp_sock *tp = tcp_sk(sk);
  10354. + struct sk_buff *tmp, *tmp1;
  10355. + u32 tcp_end_seq;
  10356. +
  10357. + if (!tp->mptcp->mapping_present)
  10358. + return 0;
  10359. +
  10360. + /* either, the new skb gave us the mapping and the first segment
  10361. + * in the sub-rcv-queue has to be trimmed ...
  10362. + */
  10363. + tmp = skb_peek(&sk->sk_receive_queue);
  10364. + if (before(TCP_SKB_CB(tmp)->seq, tp->mptcp->map_subseq) &&
  10365. + after(TCP_SKB_CB(tmp)->end_seq, tp->mptcp->map_subseq))
  10366. + mptcp_skb_trim_head(tmp, sk, tp->mptcp->map_subseq);
  10367. +
  10368. + /* ... or the new skb (tail) has to be split at the end. */
  10369. + tcp_end_seq = TCP_SKB_CB(skb)->end_seq - (tcp_hdr(skb)->fin ? 1 : 0);
  10370. + if (after(tcp_end_seq, tp->mptcp->map_subseq + tp->mptcp->map_data_len)) {
  10371. + u32 seq = tp->mptcp->map_subseq + tp->mptcp->map_data_len;
  10372. + if (mptcp_skb_split_tail(skb, sk, seq)) { /* Allocation failed */
  10373. + /* TODO : maybe handle this here better.
  10374. + * We now just force meta-retransmission.
  10375. + */
  10376. + tp->copied_seq = TCP_SKB_CB(skb)->end_seq;
  10377. + __skb_unlink(skb, &sk->sk_receive_queue);
  10378. + __kfree_skb(skb);
  10379. + return -1;
  10380. + }
  10381. + }
  10382. +
  10383. + /* Now, remove old sk_buff's from the receive-queue.
  10384. + * This may happen if the mapping has been lost for these segments and
  10385. + * the next mapping has already been received.
  10386. + */
  10387. + if (tp->mptcp->mapping_present &&
  10388. + before(TCP_SKB_CB(skb_peek(&sk->sk_receive_queue))->seq, tp->mptcp->map_subseq)) {
  10389. + skb_queue_walk_safe(&sk->sk_receive_queue, tmp1, tmp) {
  10390. + if (!before(TCP_SKB_CB(tmp1)->seq, tp->mptcp->map_subseq))
  10391. + break;
  10392. +
  10393. + tp->copied_seq = TCP_SKB_CB(tmp1)->end_seq;
  10394. + __skb_unlink(tmp1, &sk->sk_receive_queue);
  10395. +
  10396. + /* Impossible that we could free skb here, because his
  10397. + * mapping is known to be valid from previous checks
  10398. + */
  10399. + __kfree_skb(tmp1);
  10400. + }
  10401. + }
  10402. +
  10403. + return 0;
  10404. +}
  10405. +
  10406. +/* @return: 0 everything is fine. Just continue processing
  10407. + * 1 subflow is broken stop everything
  10408. + * -1 this mapping has been put in the meta-receive-queue
  10409. + * -2 this mapping has been eaten by the application
  10410. + */
  10411. +static int mptcp_queue_skb(struct sock *sk)
  10412. +{
  10413. + struct tcp_sock *tp = tcp_sk(sk), *meta_tp = mptcp_meta_tp(tp);
  10414. + struct sock *meta_sk = mptcp_meta_sk(sk);
  10415. + struct mptcp_cb *mpcb = tp->mpcb;
  10416. + struct sk_buff *tmp, *tmp1;
  10417. + u64 rcv_nxt64 = mptcp_get_rcv_nxt_64(meta_tp);
  10418. + bool data_queued = false;
  10419. +
  10420. + /* Have we not yet received the full mapping? */
  10421. + if (!tp->mptcp->mapping_present ||
  10422. + before(tp->rcv_nxt, tp->mptcp->map_subseq + tp->mptcp->map_data_len))
  10423. + return 0;
  10424. +
  10425. + /* Is this an overlapping mapping? rcv_nxt >= end_data_seq
  10426. + * OR
  10427. + * This mapping is out of window
  10428. + */
  10429. + if (!before64(rcv_nxt64, tp->mptcp->map_data_seq + tp->mptcp->map_data_len + tp->mptcp->map_data_fin) ||
  10430. + !mptcp_sequence(meta_tp, tp->mptcp->map_data_seq,
  10431. + tp->mptcp->map_data_seq + tp->mptcp->map_data_len + tp->mptcp->map_data_fin)) {
  10432. + skb_queue_walk_safe(&sk->sk_receive_queue, tmp1, tmp) {
  10433. + __skb_unlink(tmp1, &sk->sk_receive_queue);
  10434. + tp->copied_seq = TCP_SKB_CB(tmp1)->end_seq;
  10435. + __kfree_skb(tmp1);
  10436. +
  10437. + if (!skb_queue_empty(&sk->sk_receive_queue) &&
  10438. + !before(TCP_SKB_CB(tmp)->seq,
  10439. + tp->mptcp->map_subseq + tp->mptcp->map_data_len))
  10440. + break;
  10441. + }
  10442. +
  10443. + mptcp_reset_mapping(tp);
  10444. +
  10445. + return -1;
  10446. + }
  10447. +
  10448. + /* Record it, because we want to send our data_fin on the same path */
  10449. + if (tp->mptcp->map_data_fin) {
  10450. + mpcb->dfin_path_index = tp->mptcp->path_index;
  10451. + mpcb->dfin_combined = !!(sk->sk_shutdown & RCV_SHUTDOWN);
  10452. + }
  10453. +
  10454. + /* Verify the checksum */
  10455. + if (mpcb->dss_csum && !mpcb->infinite_mapping_rcv) {
  10456. + int ret = mptcp_verif_dss_csum(sk);
  10457. +
  10458. + if (ret <= 0) {
  10459. + mptcp_reset_mapping(tp);
  10460. + return 1;
  10461. + }
  10462. + }
  10463. +
  10464. + if (before64(rcv_nxt64, tp->mptcp->map_data_seq)) {
  10465. + /* Seg's have to go to the meta-ofo-queue */
  10466. + skb_queue_walk_safe(&sk->sk_receive_queue, tmp1, tmp) {
  10467. + tp->copied_seq = TCP_SKB_CB(tmp1)->end_seq;
  10468. + mptcp_prepare_skb(tmp1, tmp, sk);
  10469. + __skb_unlink(tmp1, &sk->sk_receive_queue);
  10470. + /* MUST be done here, because fragstolen may be true later.
  10471. + * Then, kfree_skb_partial will not account the memory.
  10472. + */
  10473. + skb_orphan(tmp1);
  10474. +
  10475. + if (!mpcb->in_time_wait) /* In time-wait, do not receive data */
  10476. + mptcp_add_meta_ofo_queue(meta_sk, tmp1, sk);
  10477. + else
  10478. + __kfree_skb(tmp1);
  10479. +
  10480. + if (!skb_queue_empty(&sk->sk_receive_queue) &&
  10481. + !before(TCP_SKB_CB(tmp)->seq,
  10482. + tp->mptcp->map_subseq + tp->mptcp->map_data_len))
  10483. + break;
  10484. +
  10485. + }
  10486. + } else {
  10487. + /* Ready for the meta-rcv-queue */
  10488. + skb_queue_walk_safe(&sk->sk_receive_queue, tmp1, tmp) {
  10489. + int eaten = 0;
  10490. + int copied_early = 0;
  10491. + bool fragstolen = false;
  10492. + u32 old_rcv_nxt = meta_tp->rcv_nxt;
  10493. +
  10494. + tp->copied_seq = TCP_SKB_CB(tmp1)->end_seq;
  10495. + mptcp_prepare_skb(tmp1, tmp, sk);
  10496. + __skb_unlink(tmp1, &sk->sk_receive_queue);
  10497. + /* MUST be done here, because fragstolen may be true.
  10498. + * Then, kfree_skb_partial will not account the memory.
  10499. + */
  10500. + skb_orphan(tmp1);
  10501. +
  10502. + /* This segment has already been received */
  10503. + if (!after(TCP_SKB_CB(tmp1)->end_seq, meta_tp->rcv_nxt)) {
  10504. + __kfree_skb(tmp1);
  10505. + goto next;
  10506. + }
  10507. +
  10508. +#ifdef CONFIG_NET_DMA
  10509. + if (TCP_SKB_CB(tmp1)->seq == meta_tp->rcv_nxt &&
  10510. + meta_tp->ucopy.task == current &&
  10511. + meta_tp->copied_seq == meta_tp->rcv_nxt &&
  10512. + tmp1->len <= meta_tp->ucopy.len &&
  10513. + sock_owned_by_user(meta_sk) &&
  10514. + tcp_dma_try_early_copy(meta_sk, tmp1, 0)) {
  10515. + copied_early = 1;
  10516. + eaten = 1;
  10517. + }
  10518. +#endif
  10519. +
  10520. + /* Is direct copy possible ? */
  10521. + if (TCP_SKB_CB(tmp1)->seq == meta_tp->rcv_nxt &&
  10522. + meta_tp->ucopy.task == current &&
  10523. + meta_tp->copied_seq == meta_tp->rcv_nxt &&
  10524. + meta_tp->ucopy.len && sock_owned_by_user(meta_sk) &&
  10525. + !copied_early)
  10526. + eaten = mptcp_direct_copy(tmp1, meta_sk);
  10527. +
  10528. + if (mpcb->in_time_wait) /* In time-wait, do not receive data */
  10529. + eaten = 1;
  10530. +
  10531. + if (!eaten)
  10532. + eaten = tcp_queue_rcv(meta_sk, tmp1, 0, &fragstolen);
  10533. +
  10534. + meta_tp->rcv_nxt = TCP_SKB_CB(tmp1)->end_seq;
  10535. + mptcp_check_rcvseq_wrap(meta_tp, old_rcv_nxt);
  10536. +
  10537. + if (copied_early)
  10538. + tcp_cleanup_rbuf(meta_sk, tmp1->len);
  10539. +
  10540. + if (tcp_hdr(tmp1)->fin && !mpcb->in_time_wait)
  10541. + mptcp_fin(meta_sk);
  10542. +
  10543. + /* Check if this fills a gap in the ofo queue */
  10544. + if (!skb_queue_empty(&meta_tp->out_of_order_queue))
  10545. + mptcp_ofo_queue(meta_sk);
  10546. +
  10547. +#ifdef CONFIG_NET_DMA
  10548. + if (copied_early)
  10549. + __skb_queue_tail(&meta_sk->sk_async_wait_queue,
  10550. + tmp1);
  10551. + else
  10552. +#endif
  10553. + if (eaten)
  10554. + kfree_skb_partial(tmp1, fragstolen);
  10555. +
  10556. + data_queued = true;
  10557. +next:
  10558. + if (!skb_queue_empty(&sk->sk_receive_queue) &&
  10559. + !before(TCP_SKB_CB(tmp)->seq,
  10560. + tp->mptcp->map_subseq + tp->mptcp->map_data_len))
  10561. + break;
  10562. + }
  10563. + }
  10564. +
  10565. + inet_csk(meta_sk)->icsk_ack.lrcvtime = tcp_time_stamp;
  10566. + tp->mptcp->last_data_seq = tp->mptcp->map_data_seq;
  10567. + mptcp_reset_mapping(tp);
  10568. +
  10569. + return data_queued ? -1 : -2;
  10570. +}
  10571. +
  10572. +void mptcp_data_ready(struct sock *sk, int bytes)
  10573. +{
  10574. + struct sock *meta_sk = mptcp_meta_sk(sk);
  10575. + struct sk_buff *skb, *tmp;
  10576. + int queued = 0;
  10577. +
  10578. + /* If the meta is already closed, there is no point in pushing data */
  10579. + if (meta_sk->sk_state == TCP_CLOSE && !tcp_sk(sk)->mpcb->in_time_wait) {
  10580. + skb_queue_purge(&sk->sk_receive_queue);
  10581. + tcp_sk(sk)->copied_seq = tcp_sk(sk)->rcv_nxt;
  10582. + goto exit;
  10583. + }
  10584. +
  10585. +restart:
  10586. + /* Iterate over all segments, detect their mapping (if we don't have
  10587. + * one yet), validate them and push everything one level higher.
  10588. + */
  10589. + skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) {
  10590. + int ret;
  10591. + /* Pre-validation - e.g., early fallback */
  10592. + ret = mptcp_prevalidate_skb(sk, skb);
  10593. + if (ret < 0)
  10594. + goto restart;
  10595. + else if (ret > 0)
  10596. + break;
  10597. +
  10598. + /* Set the current mapping */
  10599. + ret = mptcp_detect_mapping(sk, skb);
  10600. + if (ret < 0)
  10601. + goto restart;
  10602. + else if (ret > 0)
  10603. + break;
  10604. +
  10605. + /* Validation */
  10606. + if (mptcp_validate_mapping(sk, skb) < 0)
  10607. + goto restart;
  10608. +
  10609. + /* Push a level higher */
  10610. + ret = mptcp_queue_skb(sk);
  10611. + if (ret < 0) {
  10612. + if (ret == -1)
  10613. + queued = ret;
  10614. + goto restart;
  10615. + } else if (ret == 0) {
  10616. + continue;
  10617. + } else { /* ret == 1 */
  10618. + break;
  10619. + }
  10620. + }
  10621. +
  10622. +exit:
  10623. + if (tcp_sk(sk)->close_it) {
  10624. + tcp_send_ack(sk);
  10625. + tcp_time_wait(sk, TCP_TIME_WAIT, 0);
  10626. + }
  10627. +
  10628. + if (queued == -1 && !sock_flag(meta_sk, SOCK_DEAD))
  10629. + meta_sk->sk_data_ready(meta_sk, 0);
  10630. +}
  10631. +
  10632. +
  10633. +int mptcp_check_req(struct sk_buff *skb, struct net *net)
  10634. +{
  10635. + struct tcphdr *th = tcp_hdr(skb);
  10636. + struct sock *meta_sk = NULL;
  10637. +
  10638. + /* MPTCP structures not initialized */
  10639. + if (mptcp_init_failed)
  10640. + return 0;
  10641. +
  10642. + if (skb->protocol == htons(ETH_P_IP))
  10643. + meta_sk = mptcp_v4_search_req(th->source, ip_hdr(skb)->saddr,
  10644. + ip_hdr(skb)->daddr, net);
  10645. +#if IS_ENABLED(CONFIG_IPV6)
  10646. + else /* IPv6 */
  10647. + meta_sk = mptcp_v6_search_req(th->source, &ipv6_hdr(skb)->saddr,
  10648. + &ipv6_hdr(skb)->daddr, net);
  10649. +#endif /* CONFIG_IPV6 */
  10650. +
  10651. + if (!meta_sk)
  10652. + return 0;
  10653. +
  10654. + TCP_SKB_CB(skb)->mptcp_flags = MPTCPHDR_JOIN;
  10655. +
  10656. + bh_lock_sock_nested(meta_sk);
  10657. + if (sock_owned_by_user(meta_sk)) {
  10658. + skb->sk = meta_sk;
  10659. + if (unlikely(sk_add_backlog(meta_sk, skb,
  10660. + meta_sk->sk_rcvbuf + meta_sk->sk_sndbuf))) {
  10661. + bh_unlock_sock(meta_sk);
  10662. + NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
  10663. + sock_put(meta_sk); /* Taken by mptcp_search_req */
  10664. + kfree_skb(skb);
  10665. + return 1;
  10666. + }
  10667. + } else if (skb->protocol == htons(ETH_P_IP)) {
  10668. + tcp_v4_do_rcv(meta_sk, skb);
  10669. +#if IS_ENABLED(CONFIG_IPV6)
  10670. + } else { /* IPv6 */
  10671. + tcp_v6_do_rcv(meta_sk, skb);
  10672. +#endif /* CONFIG_IPV6 */
  10673. + }
  10674. + bh_unlock_sock(meta_sk);
  10675. + sock_put(meta_sk); /* Taken by mptcp_vX_search_req */
  10676. + return 1;
  10677. +}
  10678. +
  10679. +struct mp_join *mptcp_find_join(struct sk_buff *skb)
  10680. +{
  10681. + struct tcphdr *th = tcp_hdr(skb);
  10682. + unsigned char *ptr;
  10683. + int length = (th->doff * 4) - sizeof(struct tcphdr);
  10684. +
  10685. + /* Jump through the options to check whether JOIN is there */
  10686. + ptr = (unsigned char *)(th + 1);
  10687. + while (length > 0) {
  10688. + int opcode = *ptr++;
  10689. + int opsize;
  10690. +
  10691. + switch (opcode) {
  10692. + case TCPOPT_EOL:
  10693. + return NULL;
  10694. + case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
  10695. + length--;
  10696. + continue;
  10697. + default:
  10698. + opsize = *ptr++;
  10699. + if (opsize < 2) /* "silly options" */
  10700. + return NULL;
  10701. + if (opsize > length)
  10702. + return NULL; /* don't parse partial options */
  10703. + if (opcode == TCPOPT_MPTCP &&
  10704. + ((struct mptcp_option *)(ptr - 2))->sub == MPTCP_SUB_JOIN) {
  10705. + return (struct mp_join *)(ptr - 2);
  10706. + }
  10707. + ptr += opsize - 2;
  10708. + length -= opsize;
  10709. + }
  10710. + }
  10711. + return NULL;
  10712. +}
  10713. +
  10714. +int mptcp_lookup_join(struct sk_buff *skb, struct inet_timewait_sock *tw)
  10715. +{
  10716. + struct mptcp_cb *mpcb;
  10717. + struct sock *meta_sk;
  10718. + u32 token;
  10719. + struct mp_join *join_opt = mptcp_find_join(skb);
  10720. + if (!join_opt)
  10721. + return 0;
  10722. +
  10723. + /* MPTCP structures were not initialized, so return error */
  10724. + if (mptcp_init_failed)
  10725. + return -1;
  10726. +
  10727. + token = join_opt->u.syn.token;
  10728. + meta_sk = mptcp_hash_find(dev_net(skb_dst(skb)->dev), token);
  10729. + if (!meta_sk) {
  10730. + mptcp_debug("%s:mpcb not found:%x\n", __func__, token);
  10731. + return -1;
  10732. + }
  10733. +
  10734. + mpcb = tcp_sk(meta_sk)->mpcb;
  10735. + if (mpcb->infinite_mapping_rcv || mpcb->send_infinite_mapping) {
  10736. + /* We are in fallback-mode on the reception-side -
  10737. + * no new subflows!
  10738. + */
  10739. + sock_put(meta_sk); /* Taken by mptcp_hash_find */
  10740. + return -1;
  10741. + }
  10742. +
  10743. + /* Coming from time-wait-sock processing in tcp_v4_rcv.
  10744. + * We have to deschedule it before continuing, because otherwise
  10745. + * mptcp_v4_do_rcv will hit again on it inside tcp_v4_hnd_req.
  10746. + */
  10747. + if (tw) {
  10748. + inet_twsk_deschedule(tw, &tcp_death_row);
  10749. + inet_twsk_put(tw);
  10750. + }
  10751. +
  10752. + TCP_SKB_CB(skb)->mptcp_flags = MPTCPHDR_JOIN;
  10753. + /* OK, this is a new syn/join, let's create a new open request and
  10754. + * send syn+ack
  10755. + */
  10756. + bh_lock_sock_nested(meta_sk);
  10757. + if (sock_owned_by_user(meta_sk)) {
  10758. + skb->sk = meta_sk;
  10759. + if (unlikely(sk_add_backlog(meta_sk, skb,
  10760. + meta_sk->sk_rcvbuf + meta_sk->sk_sndbuf))) {
  10761. + bh_unlock_sock(meta_sk);
  10762. + NET_INC_STATS_BH(sock_net(meta_sk),
  10763. + LINUX_MIB_TCPBACKLOGDROP);
  10764. + sock_put(meta_sk); /* Taken by mptcp_hash_find */
  10765. + kfree_skb(skb);
  10766. + return 1;
  10767. + }
  10768. + } else if (skb->protocol == htons(ETH_P_IP)) {
  10769. + tcp_v4_do_rcv(meta_sk, skb);
  10770. +#if IS_ENABLED(CONFIG_IPV6)
  10771. + } else {
  10772. + tcp_v6_do_rcv(meta_sk, skb);
  10773. +#endif /* CONFIG_IPV6 */
  10774. + }
  10775. + bh_unlock_sock(meta_sk);
  10776. + sock_put(meta_sk); /* Taken by mptcp_hash_find */
  10777. + return 1;
  10778. +}
  10779. +
  10780. +int mptcp_do_join_short(struct sk_buff *skb, struct mptcp_options_received *mopt,
  10781. + struct tcp_options_received *tmp_opt, struct net *net)
  10782. +{
  10783. + struct sock *meta_sk;
  10784. + u32 token;
  10785. +
  10786. + token = mopt->mptcp_rem_token;
  10787. + meta_sk = mptcp_hash_find(net, token);
  10788. + if (!meta_sk) {
  10789. + mptcp_debug("%s:mpcb not found:%x\n", __func__, token);
  10790. + return -1;
  10791. + }
  10792. +
  10793. + TCP_SKB_CB(skb)->mptcp_flags = MPTCPHDR_JOIN;
  10794. +
  10795. + /* OK, this is a new syn/join, let's create a new open request and
  10796. + * send syn+ack
  10797. + */
  10798. + bh_lock_sock(meta_sk);
  10799. +
  10800. + /* This check is also done in mptcp_vX_do_rcv. But, there we cannot
  10801. + * call tcp_vX_send_reset, because we hold already two socket-locks.
  10802. + * (the listener and the meta from above)
  10803. + *
  10804. + * And the send-reset will try to take yet another one (ip_send_reply).
  10805. + * Thus, we propagate the reset up to tcp_rcv_state_process.
  10806. + */
  10807. + if (tcp_sk(meta_sk)->mpcb->infinite_mapping_rcv ||
  10808. + tcp_sk(meta_sk)->mpcb->send_infinite_mapping ||
  10809. + meta_sk->sk_state == TCP_CLOSE || !tcp_sk(meta_sk)->inside_tk_table) {
  10810. + bh_unlock_sock(meta_sk);
  10811. + sock_put(meta_sk); /* Taken by mptcp_hash_find */
  10812. + return -1;
  10813. + }
  10814. +
  10815. + if (sock_owned_by_user(meta_sk)) {
  10816. + skb->sk = meta_sk;
  10817. + if (unlikely(sk_add_backlog(meta_sk, skb,
  10818. + meta_sk->sk_rcvbuf + meta_sk->sk_sndbuf)))
  10819. + NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
  10820. + else
  10821. + /* Must make sure that upper layers won't free the
  10822. + * skb if it is added to the backlog-queue.
  10823. + */
  10824. + skb_get(skb);
  10825. + } else {
  10826. + /* mptcp_v4_do_rcv tries to free the skb - we prevent this, as
  10827. + * the skb will finally be freed by tcp_v4_do_rcv (where we are
  10828. + * coming from)
  10829. + */
  10830. + skb_get(skb);
  10831. + if (skb->protocol == htons(ETH_P_IP)) {
  10832. + tcp_v4_do_rcv(meta_sk, skb);
  10833. +#if IS_ENABLED(CONFIG_IPV6)
  10834. + } else { /* IPv6 */
  10835. + tcp_v6_do_rcv(meta_sk, skb);
  10836. +#endif /* CONFIG_IPV6 */
  10837. + }
  10838. + }
  10839. +
  10840. + bh_unlock_sock(meta_sk);
  10841. + sock_put(meta_sk); /* Taken by mptcp_hash_find */
  10842. + return 0;
  10843. +}
  10844. +
  10845. +/**
  10846. + * Equivalent of tcp_fin() for MPTCP
  10847. + * Can be called only when the FIN is validly part
  10848. + * of the data seqnum space. Not before when we get holes.
  10849. + */
  10850. +void mptcp_fin(struct sock *meta_sk)
  10851. +{
  10852. + struct sock *sk = NULL, *sk_it;
  10853. + struct tcp_sock *meta_tp = tcp_sk(meta_sk);
  10854. + struct mptcp_cb *mpcb = meta_tp->mpcb;
  10855. +
  10856. + mptcp_for_each_sk(mpcb, sk_it) {
  10857. + if (tcp_sk(sk_it)->mptcp->path_index == mpcb->dfin_path_index) {
  10858. + sk = sk_it;
  10859. + break;
  10860. + }
  10861. + }
  10862. +
  10863. + if (!sk || sk->sk_state == TCP_CLOSE)
  10864. + sk = mptcp_select_ack_sock(meta_sk, 0);
  10865. +
  10866. + inet_csk_schedule_ack(sk);
  10867. +
  10868. + meta_sk->sk_shutdown |= RCV_SHUTDOWN;
  10869. + sock_set_flag(meta_sk, SOCK_DONE);
  10870. +
  10871. + switch (meta_sk->sk_state) {
  10872. + case TCP_SYN_RECV:
  10873. + case TCP_ESTABLISHED:
  10874. + /* Move to CLOSE_WAIT */
  10875. + tcp_set_state(meta_sk, TCP_CLOSE_WAIT);
  10876. + inet_csk(sk)->icsk_ack.pingpong = 1;
  10877. + break;
  10878. +
  10879. + case TCP_CLOSE_WAIT:
  10880. + case TCP_CLOSING:
  10881. + /* Received a retransmission of the FIN, do
  10882. + * nothing.
  10883. + */
  10884. + break;
  10885. + case TCP_LAST_ACK:
  10886. + /* RFC793: Remain in the LAST-ACK state. */
  10887. + break;
  10888. +
  10889. + case TCP_FIN_WAIT1:
  10890. + /* This case occurs when a simultaneous close
  10891. + * happens, we must ack the received FIN and
  10892. + * enter the CLOSING state.
  10893. + */
  10894. + tcp_send_ack(sk);
  10895. + tcp_set_state(meta_sk, TCP_CLOSING);
  10896. + break;
  10897. + case TCP_FIN_WAIT2:
  10898. + /* Received a FIN -- send ACK and enter TIME_WAIT. */
  10899. + tcp_send_ack(sk);
  10900. + tcp_time_wait(meta_sk, TCP_TIME_WAIT, 0);
  10901. + break;
  10902. + default:
  10903. + /* Only TCP_LISTEN and TCP_CLOSE are left, in these
  10904. + * cases we should never reach this piece of code.
  10905. + */
  10906. + pr_err("%s: Impossible, meta_sk->sk_state=%d\n", __func__,
  10907. + meta_sk->sk_state);
  10908. + break;
  10909. + }
  10910. +
  10911. + /* It _is_ possible, that we have something out-of-order _after_ FIN.
  10912. + * Probably, we should reset in this case. For now drop them.
  10913. + */
  10914. + mptcp_purge_ofo_queue(meta_tp);
  10915. + sk_mem_reclaim(meta_sk);
  10916. +
  10917. + if (!sock_flag(meta_sk, SOCK_DEAD)) {
  10918. + meta_sk->sk_state_change(meta_sk);
  10919. +
  10920. + /* Do not send POLL_HUP for half duplex close. */
  10921. + if (meta_sk->sk_shutdown == SHUTDOWN_MASK ||
  10922. + meta_sk->sk_state == TCP_CLOSE)
  10923. + sk_wake_async(meta_sk, SOCK_WAKE_WAITD, POLL_HUP);
  10924. + else
  10925. + sk_wake_async(meta_sk, SOCK_WAKE_WAITD, POLL_IN);
  10926. + }
  10927. +
  10928. + return;
  10929. +}
  10930. +
  10931. +static void mptcp_xmit_retransmit_queue(struct sock *meta_sk)
  10932. +{
  10933. + struct tcp_sock *meta_tp = tcp_sk(meta_sk);
  10934. + struct sk_buff *skb;
  10935. +
  10936. + if (!meta_tp->packets_out)
  10937. + return;
  10938. +
  10939. + tcp_for_write_queue(skb, meta_sk) {
  10940. + if (skb == tcp_send_head(meta_sk))
  10941. + break;
  10942. +
  10943. + if (mptcp_retransmit_skb(meta_sk, skb))
  10944. + return;
  10945. +
  10946. + if (skb == tcp_write_queue_head(meta_sk))
  10947. + inet_csk_reset_xmit_timer(meta_sk, ICSK_TIME_RETRANS,
  10948. + inet_csk(meta_sk)->icsk_rto,
  10949. + TCP_RTO_MAX);
  10950. + }
  10951. +}
  10952. +
  10953. +/* Handle the DATA_ACK */
  10954. +static void mptcp_data_ack(struct sock *sk, const struct sk_buff *skb)
  10955. +{
  10956. + struct sock *meta_sk = mptcp_meta_sk(sk);
  10957. + struct tcp_sock *meta_tp = tcp_sk(meta_sk), *tp = tcp_sk(sk);
  10958. + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
  10959. + u32 prior_snd_una = meta_tp->snd_una;
  10960. + int prior_packets;
  10961. + u32 nwin, data_ack, data_seq;
  10962. + u16 data_len = 0;
  10963. +
  10964. + /* A valid packet came in - subflow is operational again */
  10965. + tp->pf = 0;
  10966. +
  10967. + /* Even if there is no data-ack, we stop retransmitting.
  10968. + * Except if this is a SYN/ACK. Then it is just a retransmission
  10969. + */
  10970. + if (tp->mptcp->pre_established && !tcp_hdr(skb)->syn) {
  10971. + tp->mptcp->pre_established = 0;
  10972. + sk_stop_timer(sk, &tp->mptcp->mptcp_ack_timer);
  10973. + }
  10974. +
  10975. + /* If we are in infinite mapping mode, rx_opt.data_ack has been
  10976. + * set by mptcp_clean_rtx_infinite.
  10977. + */
  10978. + if (!(tcb->mptcp_flags & MPTCPHDR_ACK) && !tp->mpcb->infinite_mapping_snd)
  10979. + goto exit;
  10980. +
  10981. + data_ack = tp->mptcp->rx_opt.data_ack;
  10982. +
  10983. + if (unlikely(!tp->mptcp->fully_established) &&
  10984. + (data_ack != meta_tp->mptcp->snt_isn ||
  10985. + tp->mptcp->snt_isn + 1 != TCP_SKB_CB(skb)->ack_seq))
  10986. + /* As soon as data has been data-acked,
  10987. + * or a subflow-data-ack (not acking syn - thus snt_isn + 1)
  10988. + * includes a data-ack, we are fully established
  10989. + */
  10990. + mptcp_become_fully_estab(sk);
  10991. +
  10992. + /* Get the data_seq */
  10993. + if (mptcp_is_data_seq(skb)) {
  10994. + data_seq = tp->mptcp->rx_opt.data_seq;
  10995. + data_len = tp->mptcp->rx_opt.data_len;
  10996. + } else {
  10997. + data_seq = meta_tp->snd_wl1;
  10998. + }
  10999. +
  11000. + /* If the ack is older than previous acks
  11001. + * then we can probably ignore it.
  11002. + */
  11003. + if (before(data_ack, prior_snd_una))
  11004. + goto exit;
  11005. +
  11006. + /* If the ack includes data we haven't sent yet, discard
  11007. + * this segment (RFC793 Section 3.9).
  11008. + */
  11009. + if (after(data_ack, meta_tp->snd_nxt))
  11010. + goto exit;
  11011. +
  11012. + /*** Now, update the window - inspired by tcp_ack_update_window ***/
  11013. + nwin = ntohs(tcp_hdr(skb)->window);
  11014. +
  11015. + if (likely(!tcp_hdr(skb)->syn))
  11016. + nwin <<= tp->rx_opt.snd_wscale;
  11017. +
  11018. + if (tcp_may_update_window(meta_tp, data_ack, data_seq, nwin)) {
  11019. + tcp_update_wl(meta_tp, data_seq);
  11020. +
  11021. + /* Draft v09, Section 3.3.5:
  11022. + * [...] It should only update its local receive window values
  11023. + * when the largest sequence number allowed (i.e. DATA_ACK +
  11024. + * receive window) increases. [...]
  11025. + */
  11026. + if (meta_tp->snd_wnd != nwin &&
  11027. + !before(data_ack + nwin, tcp_wnd_end(meta_tp))) {
  11028. + meta_tp->snd_wnd = nwin;
  11029. +
  11030. + if (nwin > meta_tp->max_window)
  11031. + meta_tp->max_window = nwin;
  11032. + }
  11033. + }
  11034. + /*** Done, update the window ***/
  11035. +
  11036. + /* We passed data and got it acked, remove any soft error
  11037. + * log. Something worked...
  11038. + */
  11039. + sk->sk_err_soft = 0;
  11040. + inet_csk(meta_sk)->icsk_probes_out = 0;
  11041. + meta_tp->rcv_tstamp = tcp_time_stamp;
  11042. + prior_packets = meta_tp->packets_out;
  11043. + if (!prior_packets)
  11044. + goto no_queue;
  11045. +
  11046. + meta_tp->snd_una = data_ack;
  11047. +
  11048. + mptcp_clean_rtx_queue(meta_sk, prior_snd_una);
  11049. +
  11050. + /* We are in loss-state, and something got acked, retransmit the whole
  11051. + * queue now!
  11052. + */
  11053. + if (inet_csk(meta_sk)->icsk_ca_state == TCP_CA_Loss &&
  11054. + after(data_ack, prior_snd_una)) {
  11055. + mptcp_xmit_retransmit_queue(meta_sk);
  11056. + inet_csk(meta_sk)->icsk_ca_state = TCP_CA_Open;
  11057. + }
  11058. +
  11059. + /* Simplified version of tcp_new_space, because the snd-buffer
  11060. + * is handled by all the subflows.
  11061. + */
  11062. + if (sock_flag(meta_sk, SOCK_QUEUE_SHRUNK)) {
  11063. + sock_reset_flag(meta_sk, SOCK_QUEUE_SHRUNK);
  11064. + if (meta_sk->sk_socket &&
  11065. + test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags))
  11066. + meta_sk->sk_write_space(meta_sk);
  11067. + }
  11068. +
  11069. + if (meta_sk->sk_state != TCP_ESTABLISHED &&
  11070. + mptcp_rcv_state_process(meta_sk, sk, skb, data_seq, data_len))
  11071. + return;
  11072. +
  11073. +exit:
  11074. + mptcp_push_pending_frames(meta_sk);
  11075. +
  11076. + return;
  11077. +
  11078. +no_queue:
  11079. + if (tcp_send_head(meta_sk))
  11080. + tcp_ack_probe(meta_sk);
  11081. +
  11082. + mptcp_push_pending_frames(meta_sk);
  11083. +
  11084. + return;
  11085. +}
  11086. +
  11087. +void mptcp_clean_rtx_infinite(struct sk_buff *skb, struct sock *sk)
  11088. +{
  11089. + struct tcp_sock *tp = tcp_sk(sk), *meta_tp = tcp_sk(mptcp_meta_sk(sk));
  11090. +
  11091. + if (!tp->mpcb->infinite_mapping_snd)
  11092. + return;
  11093. +
  11094. + /* The difference between both write_seq's represents the offset between
  11095. + * data-sequence and subflow-sequence. As we are infinite, this must
  11096. + * match.
  11097. + *
  11098. + * Thus, from this difference we can infer the meta snd_una.
  11099. + */
  11100. + tp->mptcp->rx_opt.data_ack = meta_tp->snd_nxt - tp->snd_nxt +
  11101. + tp->snd_una;
  11102. +
  11103. + mptcp_data_ack(sk, skb);
  11104. +}
  11105. +
  11106. +/**** static functions used by mptcp_parse_options */
  11107. +
  11108. +static inline int mptcp_rem_raddress(struct mptcp_cb *mpcb, u8 rem_id)
  11109. +{
  11110. + if (mptcp_v4_rem_raddress(mpcb, rem_id) < 0) {
  11111. +#if IS_ENABLED(CONFIG_IPV6)
  11112. + if (mptcp_v6_rem_raddress(mpcb, rem_id) < 0)
  11113. + return -1;
  11114. +#else
  11115. + return -1;
  11116. +#endif /* CONFIG_IPV6 */
  11117. + }
  11118. + return 0;
  11119. +}
  11120. +
  11121. +static void mptcp_send_reset_rem_id(const struct mptcp_cb *mpcb, u8 rem_id)
  11122. +{
  11123. + struct sock *sk_it, *tmpsk;
  11124. +
  11125. + mptcp_for_each_sk_safe(mpcb, sk_it, tmpsk) {
  11126. + if (tcp_sk(sk_it)->mptcp->rem_id == rem_id) {
  11127. + mptcp_reinject_data(sk_it, 0);
  11128. + sk_it->sk_err = ECONNRESET;
  11129. + if (tcp_need_reset(sk_it->sk_state))
  11130. + tcp_send_active_reset(sk_it, GFP_ATOMIC);
  11131. + mptcp_sub_force_close(sk_it);
  11132. + }
  11133. + }
  11134. +}
  11135. +
  11136. +void mptcp_parse_options(const uint8_t *ptr, int opsize,
  11137. + struct tcp_options_received *opt_rx,
  11138. + struct mptcp_options_received *mopt,
  11139. + const struct sk_buff *skb)
  11140. +{
  11141. + struct mptcp_option *mp_opt = (struct mptcp_option *)ptr;
  11142. +
  11143. + /* If the socket is mp-capable we would have a mopt. */
  11144. + if (!mopt)
  11145. + return;
  11146. +
  11147. + switch (mp_opt->sub) {
  11148. + case MPTCP_SUB_CAPABLE:
  11149. + {
  11150. + struct mp_capable *mpcapable = (struct mp_capable *)ptr;
  11151. +
  11152. + if (opsize != MPTCP_SUB_LEN_CAPABLE_SYN &&
  11153. + opsize != MPTCP_SUB_LEN_CAPABLE_ACK) {
  11154. + mptcp_debug("%s: mp_capable: bad option size %d\n",
  11155. + __func__, opsize);
  11156. + break;
  11157. + }
  11158. +
  11159. + if (!sysctl_mptcp_enabled)
  11160. + break;
  11161. +
  11162. + /* We only support MPTCP version 0 */
  11163. + if (mpcapable->ver != 0)
  11164. + break;
  11165. +
  11166. + /* MPTCP-RFC 6824:
  11167. + * "If receiving a message with the 'B' flag set to 1, and this
  11168. + * is not understood, then this SYN MUST be silently ignored;
  11169. + */
  11170. + if (mpcapable->b) {
  11171. + mopt->drop_me = 1;
  11172. + break;
  11173. + }
  11174. +
  11175. + /* MPTCP-RFC 6824:
  11176. + * "An implementation that only supports this method MUST set
  11177. + * bit "H" to 1, and bits "C" through "G" to 0."
  11178. + */
  11179. + if (!mpcapable->h)
  11180. + break;
  11181. +
  11182. + mopt->saw_mpc = 1;
  11183. + mopt->dss_csum = sysctl_mptcp_checksum || mpcapable->a;
  11184. +
  11185. + if (opsize >= MPTCP_SUB_LEN_CAPABLE_SYN)
  11186. + mopt->mptcp_key = mpcapable->sender_key;
  11187. +
  11188. + break;
  11189. + }
  11190. + case MPTCP_SUB_JOIN:
  11191. + {
  11192. + struct mp_join *mpjoin = (struct mp_join *)ptr;
  11193. +
  11194. + if (opsize != MPTCP_SUB_LEN_JOIN_SYN &&
  11195. + opsize != MPTCP_SUB_LEN_JOIN_SYNACK &&
  11196. + opsize != MPTCP_SUB_LEN_JOIN_ACK) {
  11197. + mptcp_debug("%s: mp_join: bad option size %d\n",
  11198. + __func__, opsize);
  11199. + break;
  11200. + }
  11201. +
  11202. + /* saw_mpc must be set, because in tcp_check_req we assume that
  11203. + * it is set to support falling back to reg. TCP if a rexmitted
  11204. + * SYN has no MP_CAPABLE or MP_JOIN
  11205. + */
  11206. + switch (opsize) {
  11207. + case MPTCP_SUB_LEN_JOIN_SYN:
  11208. + mopt->is_mp_join = 1;
  11209. + mopt->saw_mpc = 1;
  11210. + mopt->low_prio = mpjoin->b;
  11211. + mopt->rem_id = mpjoin->addr_id;
  11212. + mopt->mptcp_rem_token = mpjoin->u.syn.token;
  11213. + mopt->mptcp_recv_nonce = mpjoin->u.syn.nonce;
  11214. + break;
  11215. + case MPTCP_SUB_LEN_JOIN_SYNACK:
  11216. + mopt->saw_mpc = 1;
  11217. + mopt->low_prio = mpjoin->b;
  11218. + mopt->rem_id = mpjoin->addr_id;
  11219. + mopt->mptcp_recv_tmac = mpjoin->u.synack.mac;
  11220. + mopt->mptcp_recv_nonce = mpjoin->u.synack.nonce;
  11221. + break;
  11222. + case MPTCP_SUB_LEN_JOIN_ACK:
  11223. + mopt->saw_mpc = 1;
  11224. + mopt->join_ack = 1;
  11225. + memcpy(mopt->mptcp_recv_mac, mpjoin->u.ack.mac, 20);
  11226. + break;
  11227. + }
  11228. + break;
  11229. + }
  11230. + case MPTCP_SUB_DSS:
  11231. + {
  11232. + struct mp_dss *mdss = (struct mp_dss *)ptr;
  11233. + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
  11234. +
  11235. + /* We check opsize for the csum and non-csum case. We do this,
  11236. + * because the draft says that the csum SHOULD be ignored if
  11237. + * it has not been negotiated in the MP_CAPABLE but still is
  11238. + * present in the data.
  11239. + *
  11240. + * It will get ignored later in mptcp_queue_skb.
  11241. + */
  11242. + if (opsize != mptcp_sub_len_dss(mdss, 0) &&
  11243. + opsize != mptcp_sub_len_dss(mdss, 1)) {
  11244. + mptcp_debug("%s: mp_dss: bad option size %d\n",
  11245. + __func__, opsize);
  11246. + break;
  11247. + }
  11248. +
  11249. + ptr += 4;
  11250. +
  11251. + if (mdss->A) {
  11252. + tcb->mptcp_flags |= MPTCPHDR_ACK;
  11253. +
  11254. + if (mdss->a) {
  11255. + mopt->data_ack = (u32) get_unaligned_be64(ptr);
  11256. + ptr += MPTCP_SUB_LEN_ACK_64;
  11257. + } else {
  11258. + mopt->data_ack = get_unaligned_be32(ptr);
  11259. + ptr += MPTCP_SUB_LEN_ACK;
  11260. + }
  11261. + }
  11262. +
  11263. + tcb->dss_off = (ptr - skb_transport_header(skb));
  11264. +
  11265. + if (mdss->M) {
  11266. + if (mdss->m) {
  11267. + u64 data_seq64 = get_unaligned_be64(ptr);
  11268. +
  11269. + tcb->mptcp_flags |= MPTCPHDR_SEQ64_SET;
  11270. + mopt->data_seq = (u32) data_seq64;
  11271. +
  11272. + ptr += 12; /* 64-bit dseq + subseq */
  11273. + } else {
  11274. + mopt->data_seq = get_unaligned_be32(ptr);
  11275. + ptr += 8; /* 32-bit dseq + subseq */
  11276. + }
  11277. + mopt->data_len = get_unaligned_be16(ptr);
  11278. +
  11279. + tcb->mptcp_flags |= MPTCPHDR_SEQ;
  11280. +
  11281. + /* Is a check-sum present? */
  11282. + if (opsize == mptcp_sub_len_dss(mdss, 1))
  11283. + tcb->mptcp_flags |= MPTCPHDR_DSS_CSUM;
  11284. +
  11285. + /* DATA_FIN only possible with DSS-mapping */
  11286. + if (mdss->F)
  11287. + tcb->mptcp_flags |= MPTCPHDR_FIN;
  11288. + }
  11289. +
  11290. + break;
  11291. + }
  11292. + case MPTCP_SUB_ADD_ADDR:
  11293. + {
  11294. +#if IS_ENABLED(CONFIG_IPV6)
  11295. + struct mp_add_addr *mpadd = (struct mp_add_addr *)ptr;
  11296. +
  11297. + if ((mpadd->ipver == 4 && opsize != MPTCP_SUB_LEN_ADD_ADDR4 &&
  11298. + opsize != MPTCP_SUB_LEN_ADD_ADDR4 + 2) ||
  11299. + (mpadd->ipver == 6 && opsize != MPTCP_SUB_LEN_ADD_ADDR6 &&
  11300. + opsize != MPTCP_SUB_LEN_ADD_ADDR6 + 2)) {
  11301. +#else
  11302. + if (opsize != MPTCP_SUB_LEN_ADD_ADDR4 &&
  11303. + opsize != MPTCP_SUB_LEN_ADD_ADDR4 + 2) {
  11304. +#endif /* CONFIG_IPV6 */
  11305. + mptcp_debug("%s: mp_add_addr: bad option size %d\n",
  11306. + __func__, opsize);
  11307. + break;
  11308. + }
  11309. +
  11310. + /* We have to manually parse the options if we got two of them. */
  11311. + if (mopt->saw_add_addr) {
  11312. + mopt->more_add_addr = 1;
  11313. + break;
  11314. + }
  11315. + mopt->saw_add_addr = 1;
  11316. + mopt->add_addr_ptr = ptr;
  11317. + break;
  11318. + }
  11319. + case MPTCP_SUB_REMOVE_ADDR:
  11320. + if ((opsize - MPTCP_SUB_LEN_REMOVE_ADDR) < 0) {
  11321. + mptcp_debug("%s: mp_remove_addr: bad option size %d\n",
  11322. + __func__, opsize);
  11323. + break;
  11324. + }
  11325. +
  11326. + if (mopt->saw_rem_addr) {
  11327. + mopt->more_rem_addr = 1;
  11328. + break;
  11329. + }
  11330. + mopt->saw_rem_addr = 1;
  11331. + mopt->rem_addr_ptr = ptr;
  11332. + break;
  11333. + case MPTCP_SUB_PRIO:
  11334. + {
  11335. + struct mp_prio *mpprio = (struct mp_prio *)ptr;
  11336. +
  11337. + if (opsize != MPTCP_SUB_LEN_PRIO &&
  11338. + opsize != MPTCP_SUB_LEN_PRIO_ADDR) {
  11339. + mptcp_debug("%s: mp_prio: bad option size %d\n",
  11340. + __func__, opsize);
  11341. + break;
  11342. + }
  11343. +
  11344. + mopt->saw_low_prio = 1;
  11345. + mopt->low_prio = mpprio->b;
  11346. +
  11347. + if (opsize == MPTCP_SUB_LEN_PRIO_ADDR) {
  11348. + mopt->saw_low_prio = 2;
  11349. + mopt->prio_addr_id = mpprio->addr_id;
  11350. + }
  11351. + break;
  11352. + }
  11353. + case MPTCP_SUB_FAIL:
  11354. + if (opsize != MPTCP_SUB_LEN_FAIL) {
  11355. + mptcp_debug("%s: mp_fail: bad option size %d\n",
  11356. + __func__, opsize);
  11357. + break;
  11358. + }
  11359. + mopt->mp_fail = 1;
  11360. + break;
  11361. + case MPTCP_SUB_FCLOSE:
  11362. + if (opsize != MPTCP_SUB_LEN_FCLOSE) {
  11363. + mptcp_debug("%s: mp_fclose: bad option size %d\n",
  11364. + __func__, opsize);
  11365. + break;
  11366. + }
  11367. +
  11368. + mopt->mp_fclose = 1;
  11369. + mopt->mptcp_key = ((struct mp_fclose *)ptr)->key;
  11370. +
  11371. + break;
  11372. + default:
  11373. + mptcp_debug("%s: Received unkown subtype: %d\n",
  11374. + __func__, mp_opt->sub);
  11375. + break;
  11376. + }
  11377. +}
  11378. +
  11379. +int mptcp_check_rtt(const struct tcp_sock *tp, int time)
  11380. +{
  11381. + struct mptcp_cb *mpcb = tp->mpcb;
  11382. + struct sock *sk;
  11383. + u32 rtt_max = 0;
  11384. +
  11385. + /* In MPTCP, we take the max delay across all flows,
  11386. + * in order to take into account meta-reordering buffers.
  11387. + */
  11388. + mptcp_for_each_sk(mpcb, sk) {
  11389. + if (!mptcp_sk_can_recv(sk))
  11390. + continue;
  11391. +
  11392. + if (rtt_max < tcp_sk(sk)->rcv_rtt_est.rtt)
  11393. + rtt_max = tcp_sk(sk)->rcv_rtt_est.rtt;
  11394. + }
  11395. + if (time < (rtt_max >> 3) || !rtt_max)
  11396. + return 1;
  11397. +
  11398. + return 0;
  11399. +}
  11400. +
  11401. +static void mptcp_handle_add_addr(const unsigned char *ptr, struct sock *sk)
  11402. +{
  11403. + struct mp_add_addr *mpadd = (struct mp_add_addr *)ptr;
  11404. +
  11405. + if (mpadd->ipver == 4) {
  11406. + __be16 port = 0;
  11407. + if (mpadd->len == MPTCP_SUB_LEN_ADD_ADDR4 + 2)
  11408. + port = mpadd->u.v4.port;
  11409. +
  11410. + mptcp_v4_add_raddress(tcp_sk(sk)->mpcb, &mpadd->u.v4.addr, port,
  11411. + mpadd->addr_id);
  11412. +#if IS_ENABLED(CONFIG_IPV6)
  11413. + } else if (mpadd->ipver == 6) {
  11414. + __be16 port = 0;
  11415. + if (mpadd->len == MPTCP_SUB_LEN_ADD_ADDR6 + 2)
  11416. + port = mpadd->u.v6.port;
  11417. +
  11418. + mptcp_v6_add_raddress(tcp_sk(sk)->mpcb, &mpadd->u.v6.addr, port,
  11419. + mpadd->addr_id);
  11420. +#endif /* CONFIG_IPV6 */
  11421. + }
  11422. +}
  11423. +
  11424. +static void mptcp_handle_rem_addr(const unsigned char *ptr, struct sock *sk)
  11425. +{
  11426. + struct mp_remove_addr *mprem = (struct mp_remove_addr *)ptr;
  11427. + int i;
  11428. + u8 rem_id;
  11429. +
  11430. + for (i = 0; i <= mprem->len - MPTCP_SUB_LEN_REMOVE_ADDR; i++) {
  11431. + rem_id = (&mprem->addrs_id)[i];
  11432. + if (!mptcp_rem_raddress(tcp_sk(sk)->mpcb, rem_id))
  11433. + mptcp_send_reset_rem_id(tcp_sk(sk)->mpcb, rem_id);
  11434. + }
  11435. +}
  11436. +
  11437. +static void mptcp_parse_addropt(const struct sk_buff *skb, struct sock *sk)
  11438. +{
  11439. + struct tcphdr *th = tcp_hdr(skb);
  11440. + unsigned char *ptr;
  11441. + int length = (th->doff * 4) - sizeof(struct tcphdr);
  11442. +
  11443. + /* Jump through the options to check whether ADD_ADDR is there */
  11444. + ptr = (unsigned char *)(th + 1);
  11445. + while (length > 0) {
  11446. + int opcode = *ptr++;
  11447. + int opsize;
  11448. +
  11449. + switch (opcode) {
  11450. + case TCPOPT_EOL:
  11451. + return;
  11452. + case TCPOPT_NOP:
  11453. + length--;
  11454. + continue;
  11455. + default:
  11456. + opsize = *ptr++;
  11457. + if (opsize < 2)
  11458. + return;
  11459. + if (opsize > length)
  11460. + return; /* don't parse partial options */
  11461. + if (opcode == TCPOPT_MPTCP &&
  11462. + ((struct mptcp_option *)ptr)->sub == MPTCP_SUB_ADD_ADDR) {
  11463. +#if IS_ENABLED(CONFIG_IPV6)
  11464. + struct mp_add_addr *mpadd = (struct mp_add_addr *)ptr;
  11465. + if ((mpadd->ipver == 4 && opsize != MPTCP_SUB_LEN_ADD_ADDR4 &&
  11466. + opsize != MPTCP_SUB_LEN_ADD_ADDR4 + 2) ||
  11467. + (mpadd->ipver == 6 && opsize != MPTCP_SUB_LEN_ADD_ADDR6 &&
  11468. + opsize != MPTCP_SUB_LEN_ADD_ADDR6 + 2))
  11469. +#else
  11470. + if (opsize != MPTCP_SUB_LEN_ADD_ADDR4 &&
  11471. + opsize != MPTCP_SUB_LEN_ADD_ADDR4 + 2)
  11472. +#endif /* CONFIG_IPV6 */
  11473. + goto cont;
  11474. +
  11475. + mptcp_handle_add_addr(ptr, sk);
  11476. + }
  11477. + if (opcode == TCPOPT_MPTCP &&
  11478. + ((struct mptcp_option *)ptr)->sub == MPTCP_SUB_REMOVE_ADDR) {
  11479. + if ((opsize - MPTCP_SUB_LEN_REMOVE_ADDR) < 0)
  11480. + goto cont;
  11481. +
  11482. + mptcp_handle_rem_addr(ptr, sk);
  11483. + }
  11484. +cont:
  11485. + ptr += opsize - 2;
  11486. + length -= opsize;
  11487. + }
  11488. + }
  11489. + return;
  11490. +}
  11491. +
  11492. +static inline int mptcp_mp_fail_rcvd(struct sock *sk, const struct tcphdr *th)
  11493. +{
  11494. + struct mptcp_tcp_sock *mptcp = tcp_sk(sk)->mptcp;
  11495. + struct sock *meta_sk = mptcp_meta_sk(sk);
  11496. + struct mptcp_cb *mpcb = tcp_sk(sk)->mpcb;
  11497. +
  11498. + if (unlikely(mptcp->rx_opt.mp_fail)) {
  11499. + mptcp->rx_opt.mp_fail = 0;
  11500. +
  11501. + if (!th->rst && !mpcb->infinite_mapping_snd) {
  11502. + struct sock *sk_it;
  11503. +
  11504. + mpcb->send_infinite_mapping = 1;
  11505. + /* We resend everything that has not been acknowledged */
  11506. + meta_sk->sk_send_head = tcp_write_queue_head(meta_sk);
  11507. +
  11508. + /* We artificially restart the whole send-queue. Thus,
  11509. + * it is as if no packets are in flight
  11510. + */
  11511. + tcp_sk(meta_sk)->packets_out = 0;
  11512. +
  11513. + /* If the snd_nxt already wrapped around, we have to
  11514. + * undo the wrapping, as we are restarting from snd_una
  11515. + * on.
  11516. + */
  11517. + if (tcp_sk(meta_sk)->snd_nxt < tcp_sk(meta_sk)->snd_una) {
  11518. + mpcb->snd_high_order[mpcb->snd_hiseq_index] -= 2;
  11519. + mpcb->snd_hiseq_index = mpcb->snd_hiseq_index ? 0 : 1;
  11520. + }
  11521. + tcp_sk(meta_sk)->snd_nxt = tcp_sk(meta_sk)->snd_una;
  11522. +
  11523. + /* Trigger a sending on the meta. */
  11524. + mptcp_push_pending_frames(meta_sk);
  11525. +
  11526. + mptcp_for_each_sk(mpcb, sk_it) {
  11527. + if (sk != sk_it)
  11528. + mptcp_sub_force_close(sk_it);
  11529. + }
  11530. + }
  11531. +
  11532. + return 0;
  11533. + }
  11534. +
  11535. + if (unlikely(mptcp->rx_opt.mp_fclose)) {
  11536. + struct sock *sk_it, *tmpsk;
  11537. +
  11538. + mptcp->rx_opt.mp_fclose = 0;
  11539. + if (mptcp->rx_opt.mptcp_key != mpcb->mptcp_loc_key)
  11540. + return 0;
  11541. +
  11542. + if (tcp_need_reset(sk->sk_state))
  11543. + tcp_send_active_reset(sk, GFP_ATOMIC);
  11544. +
  11545. + mptcp_for_each_sk_safe(mpcb, sk_it, tmpsk)
  11546. + mptcp_sub_force_close(sk_it);
  11547. +
  11548. + tcp_reset(meta_sk);
  11549. +
  11550. + return 1;
  11551. + }
  11552. +
  11553. + return 0;
  11554. +}
  11555. +
  11556. +static inline void mptcp_path_array_check(struct sock *meta_sk)
  11557. +{
  11558. + struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
  11559. +
  11560. + if (unlikely(mpcb->list_rcvd)) {
  11561. + mpcb->list_rcvd = 0;
  11562. + if (mpcb->pm_ops->new_remote_address)
  11563. + mpcb->pm_ops->new_remote_address(meta_sk);
  11564. + }
  11565. +}
  11566. +
  11567. +int mptcp_handle_options(struct sock *sk, const struct tcphdr *th, struct sk_buff *skb)
  11568. +{
  11569. + struct tcp_sock *tp = tcp_sk(sk);
  11570. + struct mptcp_options_received *mopt = &tp->mptcp->rx_opt;
  11571. +
  11572. + if (tp->mpcb->infinite_mapping_rcv || tp->mpcb->infinite_mapping_snd)
  11573. + return 0;
  11574. +
  11575. + if (mptcp_mp_fail_rcvd(sk, th))
  11576. + return 1;
  11577. +
  11578. + /* RFC 6824, Section 3.3:
  11579. + * If a checksum is not present when its use has been negotiated, the
  11580. + * receiver MUST close the subflow with a RST as it is considered broken.
  11581. + */
  11582. + if (mptcp_is_data_seq(skb) && tp->mpcb->dss_csum &&
  11583. + !(TCP_SKB_CB(skb)->mptcp_flags & MPTCPHDR_DSS_CSUM)) {
  11584. + if (tcp_need_reset(sk->sk_state))
  11585. + tcp_send_active_reset(sk, GFP_ATOMIC);
  11586. +
  11587. + mptcp_sub_force_close(sk);
  11588. + return 1;
  11589. + }
  11590. +
  11591. + /* We have to acknowledge retransmissions of the third
  11592. + * ack.
  11593. + */
  11594. + if (mopt->join_ack) {
  11595. + tcp_send_delayed_ack(sk);
  11596. + mopt->join_ack = 0;
  11597. + }
  11598. +
  11599. + if (mopt->saw_add_addr || mopt->saw_rem_addr) {
  11600. + if (mopt->more_add_addr || mopt->more_rem_addr) {
  11601. + mptcp_parse_addropt(skb, sk);
  11602. + } else {
  11603. + if (mopt->saw_add_addr)
  11604. + mptcp_handle_add_addr(mopt->add_addr_ptr, sk);
  11605. + if (mopt->saw_rem_addr)
  11606. + mptcp_handle_rem_addr(mopt->rem_addr_ptr, sk);
  11607. + }
  11608. +
  11609. + mopt->more_add_addr = 0;
  11610. + mopt->saw_add_addr = 0;
  11611. + mopt->more_rem_addr = 0;
  11612. + mopt->saw_rem_addr = 0;
  11613. + }
  11614. + if (mopt->saw_low_prio) {
  11615. + if (mopt->saw_low_prio == 1) {
  11616. + tp->mptcp->rcv_low_prio = mopt->low_prio;
  11617. + } else {
  11618. + struct sock *sk_it;
  11619. + mptcp_for_each_sk(tp->mpcb, sk_it) {
  11620. + struct mptcp_tcp_sock *mptcp = tcp_sk(sk_it)->mptcp;
  11621. + if (mptcp->rem_id == mopt->prio_addr_id)
  11622. + mptcp->rcv_low_prio = mopt->low_prio;
  11623. + }
  11624. + }
  11625. + mopt->saw_low_prio = 0;
  11626. + }
  11627. +
  11628. + mptcp_data_ack(sk, skb);
  11629. +
  11630. + mptcp_path_array_check(mptcp_meta_sk(sk));
  11631. + /* Socket may have been mp_killed by a REMOVE_ADDR */
  11632. + if (tp->mp_killed)
  11633. + return 1;
  11634. +
  11635. + return 0;
  11636. +}
  11637. +
  11638. +/* The skptr is needed, because if we become MPTCP-capable, we have to switch
  11639. + * from meta-socket to master-socket.
  11640. + *
  11641. + * @return: 1 - we want to reset this connection
  11642. + * 2 - we want to discard the received syn/ack
  11643. + * 0 - everything is fine - continue
  11644. + */
  11645. +int mptcp_rcv_synsent_state_process(struct sock *sk, struct sock **skptr,
  11646. + struct sk_buff *skb,
  11647. + struct mptcp_options_received *mopt)
  11648. +{
  11649. + struct tcp_sock *tp = tcp_sk(sk);
  11650. +
  11651. + if (tp->mpc) {
  11652. + u8 hash_mac_check[20];
  11653. + struct mptcp_cb *mpcb = tp->mpcb;
  11654. +
  11655. + mptcp_hmac_sha1((u8 *)&mpcb->mptcp_rem_key,
  11656. + (u8 *)&mpcb->mptcp_loc_key,
  11657. + (u8 *)&tp->mptcp->rx_opt.mptcp_recv_nonce,
  11658. + (u8 *)&tp->mptcp->mptcp_loc_nonce,
  11659. + (u32 *)hash_mac_check);
  11660. + if (memcmp(hash_mac_check,
  11661. + (char *)&tp->mptcp->rx_opt.mptcp_recv_tmac, 8)) {
  11662. + mptcp_sub_force_close(sk);
  11663. + return 1;
  11664. + }
  11665. +
  11666. + /* Set this flag in order to postpone data sending
  11667. + * until the 4th ack arrives.
  11668. + */
  11669. + tp->mptcp->pre_established = 1;
  11670. + tp->mptcp->rcv_low_prio = tp->mptcp->rx_opt.low_prio;
  11671. +
  11672. + mptcp_hmac_sha1((u8 *)&mpcb->mptcp_loc_key,
  11673. + (u8 *)&mpcb->mptcp_rem_key,
  11674. + (u8 *)&tp->mptcp->mptcp_loc_nonce,
  11675. + (u8 *)&tp->mptcp->rx_opt.mptcp_recv_nonce,
  11676. + (u32 *)&tp->mptcp->sender_mac[0]);
  11677. +
  11678. + } else if (mopt->saw_mpc) {
  11679. + if (mptcp_create_master_sk(sk, mopt->mptcp_key,
  11680. + ntohs(tcp_hdr(skb)->window)))
  11681. + return 2;
  11682. +
  11683. + sk = tcp_sk(sk)->mpcb->master_sk;
  11684. + *skptr = sk;
  11685. + tp = tcp_sk(sk);
  11686. +
  11687. + /* snd_nxt - 1, because it has been incremented
  11688. + * by tcp_connect for the SYN
  11689. + */
  11690. + tp->mptcp->snt_isn = tp->snd_nxt - 1;
  11691. + tp->mpcb->dss_csum = mopt->dss_csum;
  11692. + tp->mptcp->include_mpc = 1;
  11693. +
  11694. + sk_set_socket(sk, mptcp_meta_sk(sk)->sk_socket);
  11695. + sk->sk_wq = mptcp_meta_sk(sk)->sk_wq;
  11696. +
  11697. + mptcp_update_metasocket(sk, mptcp_meta_sk(sk));
  11698. +
  11699. + /* hold in mptcp_inherit_sk due to initialization to 2 */
  11700. + sock_put(sk);
  11701. + } else {
  11702. + tp->request_mptcp = 0;
  11703. +
  11704. + if (tp->inside_tk_table)
  11705. + mptcp_hash_remove(tp);
  11706. + }
  11707. +
  11708. + if (tp->mpc)
  11709. + tp->mptcp->rcv_isn = TCP_SKB_CB(skb)->seq;
  11710. +
  11711. + return 0;
  11712. +}
  11713. +
  11714. +bool mptcp_should_expand_sndbuf(const struct sock *sk)
  11715. +{
  11716. + struct sock *sk_it;
  11717. + struct sock *meta_sk = mptcp_meta_sk(sk);
  11718. + struct tcp_sock *meta_tp = tcp_sk(meta_sk);
  11719. + int cnt_backups = 0;
  11720. + int backup_available = 0;
  11721. +
  11722. + /* We circumvent this check in tcp_check_space, because we want to
  11723. + * always call sk_write_space. So, we reproduce the check here.
  11724. + */
  11725. + if (!meta_sk->sk_socket ||
  11726. + !test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags))
  11727. + return false;
  11728. +
  11729. + /* If the user specified a specific send buffer setting, do
  11730. + * not modify it.
  11731. + */
  11732. + if (meta_sk->sk_userlocks & SOCK_SNDBUF_LOCK)
  11733. + return false;
  11734. +
  11735. + /* If we are under global TCP memory pressure, do not expand. */
  11736. + if (sk_under_memory_pressure(meta_sk))
  11737. + return false;
  11738. +
  11739. + /* If we are under soft global TCP memory pressure, do not expand. */
  11740. + if (sk_memory_allocated(meta_sk) >= sk_prot_mem_limits(meta_sk, 0))
  11741. + return false;
  11742. +
  11743. +
  11744. + /* For MPTCP we look for a subsocket that could send data.
  11745. + * If we found one, then we update the send-buffer.
  11746. + */
  11747. + mptcp_for_each_sk(meta_tp->mpcb, sk_it) {
  11748. + struct tcp_sock *tp_it = tcp_sk(sk_it);
  11749. +
  11750. + if (!mptcp_sk_can_send(sk_it))
  11751. + continue;
  11752. +
  11753. + /* Backup-flows have to be counted - if there is no other
  11754. + * subflow we take the backup-flow into account. */
  11755. + if (tp_it->mptcp->rcv_low_prio || tp_it->mptcp->low_prio) {
  11756. + cnt_backups++;
  11757. + }
  11758. +
  11759. + if (tp_it->packets_out < tp_it->snd_cwnd) {
  11760. + if (tp_it->mptcp->rcv_low_prio || tp_it->mptcp->low_prio) {
  11761. + backup_available = 1;
  11762. + continue;
  11763. + }
  11764. + return true;
  11765. + }
  11766. + }
  11767. +
  11768. + /* Backup-flow is available for sending - update send-buffer */
  11769. + if (meta_tp->mpcb->cnt_established == cnt_backups && backup_available)
  11770. + return true;
  11771. + return false;
  11772. +}
  11773. +
  11774. +void mptcp_init_buffer_space(struct sock *sk)
  11775. +{
  11776. + struct tcp_sock *tp = tcp_sk(sk);
  11777. + struct sock *meta_sk = mptcp_meta_sk(sk);
  11778. + struct tcp_sock *meta_tp = tcp_sk(meta_sk);
  11779. + int space;
  11780. +
  11781. + tcp_init_buffer_space(sk);
  11782. +
  11783. + if (is_master_tp(tp)) {
  11784. + /* If there is only one subflow, we just use regular TCP
  11785. + * autotuning. User-locks are handled already by
  11786. + * tcp_init_buffer_space
  11787. + */
  11788. + meta_tp->window_clamp = tp->window_clamp;
  11789. + meta_tp->rcv_ssthresh = tp->rcv_ssthresh;
  11790. + meta_sk->sk_rcvbuf = sk->sk_rcvbuf;
  11791. + meta_sk->sk_sndbuf = sk->sk_sndbuf;
  11792. +
  11793. + return;
  11794. + }
  11795. +
  11796. + if (meta_sk->sk_userlocks & SOCK_RCVBUF_LOCK)
  11797. + goto snd_buf;
  11798. +
  11799. + /* Adding a new subflow to the rcv-buffer space. We make a simple
  11800. + * addition, to give some space to allow traffic on the new subflow.
  11801. + * Autotuning will increase it further later on.
  11802. + */
  11803. + space = min(meta_sk->sk_rcvbuf + sk->sk_rcvbuf, sysctl_tcp_rmem[2]);
  11804. + if (space > meta_sk->sk_rcvbuf) {
  11805. + meta_tp->window_clamp += tp->window_clamp;
  11806. + meta_tp->rcv_ssthresh += tp->rcv_ssthresh;
  11807. + meta_sk->sk_rcvbuf = space;
  11808. + }
  11809. +
  11810. +snd_buf:
  11811. + if (meta_sk->sk_userlocks & SOCK_SNDBUF_LOCK)
  11812. + return;
  11813. +
  11814. + /* Adding a new subflow to the send-buffer space. We make a simple
  11815. + * addition, to give some space to allow traffic on the new subflow.
  11816. + * Autotuning will increase it further later on.
  11817. + */
  11818. + space = min(meta_sk->sk_sndbuf + sk->sk_sndbuf, sysctl_tcp_wmem[2]);
  11819. + if (space > meta_sk->sk_sndbuf) {
  11820. + meta_sk->sk_sndbuf = space;
  11821. + meta_sk->sk_write_space(meta_sk);
  11822. + }
  11823. +}
  11824. +
  11825. +void mptcp_tcp_set_rto(struct sock *sk)
  11826. +{
  11827. + tcp_set_rto(sk);
  11828. + mptcp_set_rto(sk);
  11829. +}
  11830. diff -Nur linux-3.14.45.orig/net/mptcp/mptcp_ipv4.c linux-3.14.45/net/mptcp/mptcp_ipv4.c
  11831. --- linux-3.14.45.orig/net/mptcp/mptcp_ipv4.c 1970-01-01 01:00:00.000000000 +0100
  11832. +++ linux-3.14.45/net/mptcp/mptcp_ipv4.c 2015-06-24 14:15:48.895862487 +0200
  11833. @@ -0,0 +1,603 @@
  11834. +/*
  11835. + * MPTCP implementation - IPv4-specific functions
  11836. + *
  11837. + * Initial Design & Implementation:
  11838. + * Sébastien Barré <sebastien.barre@uclouvain.be>
  11839. + *
  11840. + * Current Maintainer:
  11841. + * Christoph Paasch <christoph.paasch@uclouvain.be>
  11842. + *
  11843. + * Additional authors:
  11844. + * Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
  11845. + * Gregory Detal <gregory.detal@uclouvain.be>
  11846. + * Fabien Duchêne <fabien.duchene@uclouvain.be>
  11847. + * Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
  11848. + * Lavkesh Lahngir <lavkesh51@gmail.com>
  11849. + * Andreas Ripke <ripke@neclab.eu>
  11850. + * Vlad Dogaru <vlad.dogaru@intel.com>
  11851. + * Octavian Purdila <octavian.purdila@intel.com>
  11852. + * John Ronan <jronan@tssg.org>
  11853. + * Catalin Nicutar <catalin.nicutar@gmail.com>
  11854. + * Brandon Heller <brandonh@stanford.edu>
  11855. + *
  11856. + *
  11857. + * This program is free software; you can redistribute it and/or
  11858. + * modify it under the terms of the GNU General Public License
  11859. + * as published by the Free Software Foundation; either version
  11860. + * 2 of the License, or (at your option) any later version.
  11861. + */
  11862. +
  11863. +#include <linux/export.h>
  11864. +#include <linux/ip.h>
  11865. +#include <linux/list.h>
  11866. +#include <linux/skbuff.h>
  11867. +#include <linux/spinlock.h>
  11868. +#include <linux/tcp.h>
  11869. +
  11870. +#include <net/inet_common.h>
  11871. +#include <net/inet_connection_sock.h>
  11872. +#include <net/mptcp.h>
  11873. +#include <net/mptcp_v4.h>
  11874. +#include <net/request_sock.h>
  11875. +#include <net/tcp.h>
  11876. +
  11877. +u32 mptcp_v4_get_nonce(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
  11878. + u32 seq)
  11879. +{
  11880. + u32 hash[MD5_DIGEST_WORDS];
  11881. +
  11882. + hash[0] = (__force u32)saddr;
  11883. + hash[1] = (__force u32)daddr;
  11884. + hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
  11885. + hash[3] = seq;
  11886. +
  11887. + md5_transform(hash, mptcp_secret);
  11888. +
  11889. + return hash[0];
  11890. +}
  11891. +
  11892. +u64 mptcp_v4_get_key(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport)
  11893. +{
  11894. + u32 hash[MD5_DIGEST_WORDS];
  11895. +
  11896. + hash[0] = (__force u32)saddr;
  11897. + hash[1] = (__force u32)daddr;
  11898. + hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
  11899. + hash[3] = mptcp_key_seed++;
  11900. +
  11901. + md5_transform(hash, mptcp_secret);
  11902. +
  11903. + return *((u64 *)hash);
  11904. +}
  11905. +
  11906. +
  11907. +static void mptcp_v4_reqsk_destructor(struct request_sock *req)
  11908. +{
  11909. + mptcp_reqsk_destructor(req);
  11910. +
  11911. + tcp_v4_reqsk_destructor(req);
  11912. +}
  11913. +
  11914. +/* Similar to tcp_request_sock_ops */
  11915. +struct request_sock_ops mptcp_request_sock_ops __read_mostly = {
  11916. + .family = PF_INET,
  11917. + .obj_size = sizeof(struct mptcp_request_sock),
  11918. + .rtx_syn_ack = tcp_v4_rtx_synack,
  11919. + .send_ack = tcp_v4_reqsk_send_ack,
  11920. + .destructor = mptcp_v4_reqsk_destructor,
  11921. + .send_reset = tcp_v4_send_reset,
  11922. + .syn_ack_timeout = tcp_syn_ack_timeout,
  11923. +};
  11924. +
  11925. +static void mptcp_v4_reqsk_queue_hash_add(struct sock *meta_sk,
  11926. + struct request_sock *req,
  11927. + unsigned long timeout)
  11928. +{
  11929. + const u32 h1 = inet_synq_hash(inet_rsk(req)->ir_rmt_addr,
  11930. + inet_rsk(req)->ir_rmt_port,
  11931. + 0, MPTCP_HASH_SIZE);
  11932. + /* We cannot call inet_csk_reqsk_queue_hash_add(), because we do not
  11933. + * want to reset the keepalive-timer (responsible for retransmitting
  11934. + * SYN/ACKs). We do not retransmit SYN/ACKs+MP_JOINs, because we cannot
  11935. + * overload the keepalive timer. Also, it's not a big deal, because the
  11936. + * third ACK of the MP_JOIN-handshake is sent in a reliable manner. So,
  11937. + * if the third ACK gets lost, the client will handle the retransmission
  11938. + * anyways. If our SYN/ACK gets lost, the client will retransmit the
  11939. + * SYN.
  11940. + */
  11941. + struct inet_connection_sock *meta_icsk = inet_csk(meta_sk);
  11942. + struct listen_sock *lopt = meta_icsk->icsk_accept_queue.listen_opt;
  11943. + const u32 h2 = inet_synq_hash(inet_rsk(req)->ir_rmt_addr,
  11944. + inet_rsk(req)->ir_rmt_port,
  11945. + lopt->hash_rnd, lopt->nr_table_entries);
  11946. +
  11947. + reqsk_queue_hash_req(&meta_icsk->icsk_accept_queue, h2, req, timeout);
  11948. + reqsk_queue_added(&meta_icsk->icsk_accept_queue);
  11949. +
  11950. + spin_lock(&mptcp_reqsk_hlock);
  11951. + list_add(&mptcp_rsk(req)->collide_tuple, &mptcp_reqsk_htb[h1]);
  11952. + spin_unlock(&mptcp_reqsk_hlock);
  11953. +}
  11954. +
  11955. +/* Similar to tcp_v4_conn_request */
  11956. +static void mptcp_v4_join_request(struct sock *meta_sk, struct sk_buff *skb)
  11957. +{
  11958. + struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
  11959. + struct tcp_options_received tmp_opt;
  11960. + struct mptcp_options_received mopt;
  11961. + struct request_sock *req;
  11962. + struct inet_request_sock *ireq;
  11963. + struct mptcp_request_sock *mtreq;
  11964. + struct dst_entry *dst = NULL;
  11965. + u8 mptcp_hash_mac[20];
  11966. + __be32 saddr = ip_hdr(skb)->saddr;
  11967. + __be32 daddr = ip_hdr(skb)->daddr;
  11968. + __u32 isn = TCP_SKB_CB(skb)->when;
  11969. + int want_cookie = 0;
  11970. + union inet_addr addr;
  11971. +
  11972. + tcp_clear_options(&tmp_opt);
  11973. + mptcp_init_mp_opt(&mopt);
  11974. + tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
  11975. + tmp_opt.user_mss = tcp_sk(meta_sk)->rx_opt.user_mss;
  11976. + tcp_parse_options(skb, &tmp_opt, &mopt, 0, NULL);
  11977. +
  11978. + req = inet_reqsk_alloc(&mptcp_request_sock_ops);
  11979. + if (!req)
  11980. + return;
  11981. +
  11982. +#ifdef CONFIG_TCP_MD5SIG
  11983. + tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
  11984. +#endif
  11985. +
  11986. + tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
  11987. + tcp_openreq_init(req, &tmp_opt, skb);
  11988. +
  11989. + ireq = inet_rsk(req);
  11990. + ireq->ir_loc_addr = daddr;
  11991. + ireq->ir_rmt_addr = saddr;
  11992. + ireq->no_srccheck = inet_sk(meta_sk)->transparent;
  11993. + ireq->opt = tcp_v4_save_options(skb);
  11994. +
  11995. + if (security_inet_conn_request(meta_sk, skb, req))
  11996. + goto drop_and_free;
  11997. +
  11998. + if (!want_cookie || tmp_opt.tstamp_ok)
  11999. + TCP_ECN_create_request(req, skb, sock_net(meta_sk));
  12000. +
  12001. + if (!isn) {
  12002. + struct flowi4 fl4;
  12003. +
  12004. + /* VJ's idea. We save last timestamp seen
  12005. + * from the destination in peer table, when entering
  12006. + * state TIME-WAIT, and check against it before
  12007. + * accepting new connection request.
  12008. + *
  12009. + * If "isn" is not zero, this request hit alive
  12010. + * timewait bucket, so that all the necessary checks
  12011. + * are made in the function processing timewait state.
  12012. + */
  12013. + if (tmp_opt.saw_tstamp &&
  12014. + tcp_death_row.sysctl_tw_recycle &&
  12015. + (dst = inet_csk_route_req(meta_sk, &fl4, req)) != NULL &&
  12016. + fl4.daddr == saddr) {
  12017. + if (!tcp_peer_is_proven(req, dst, true)) {
  12018. + NET_INC_STATS_BH(sock_net(meta_sk), LINUX_MIB_PAWSPASSIVEREJECTED);
  12019. + goto drop_and_release;
  12020. + }
  12021. + }
  12022. + /* Kill the following clause, if you dislike this way. */
  12023. + else if (!sysctl_tcp_syncookies &&
  12024. + (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(meta_sk) <
  12025. + (sysctl_max_syn_backlog >> 2)) &&
  12026. + !tcp_peer_is_proven(req, dst, false)) {
  12027. + /* Without syncookies last quarter of
  12028. + * backlog is filled with destinations,
  12029. + * proven to be alive.
  12030. + * It means that we continue to communicate
  12031. + * to destinations, already remembered
  12032. + * to the moment of synflood.
  12033. + */
  12034. + LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
  12035. + &saddr, ntohs(tcp_hdr(skb)->source));
  12036. + goto drop_and_release;
  12037. + }
  12038. +
  12039. + isn = tcp_v4_init_sequence(skb);
  12040. + }
  12041. + tcp_rsk(req)->snt_isn = isn;
  12042. + tcp_rsk(req)->snt_synack = tcp_time_stamp;
  12043. + tcp_rsk(req)->listener = NULL;
  12044. +
  12045. + mtreq = mptcp_rsk(req);
  12046. + mtreq->mpcb = mpcb;
  12047. + INIT_LIST_HEAD(&mtreq->collide_tuple);
  12048. + mtreq->mptcp_rem_nonce = mopt.mptcp_recv_nonce;
  12049. + mtreq->mptcp_rem_key = mpcb->mptcp_rem_key;
  12050. + mtreq->mptcp_loc_key = mpcb->mptcp_loc_key;
  12051. + mtreq->mptcp_loc_nonce = mptcp_v4_get_nonce(saddr, daddr,
  12052. + tcp_hdr(skb)->source,
  12053. + tcp_hdr(skb)->dest, isn);
  12054. + mptcp_hmac_sha1((u8 *)&mtreq->mptcp_loc_key,
  12055. + (u8 *)&mtreq->mptcp_rem_key,
  12056. + (u8 *)&mtreq->mptcp_loc_nonce,
  12057. + (u8 *)&mtreq->mptcp_rem_nonce, (u32 *)mptcp_hash_mac);
  12058. + mtreq->mptcp_hash_tmac = *(u64 *)mptcp_hash_mac;
  12059. +
  12060. + addr.ip = ireq->ir_loc_addr;
  12061. + mtreq->loc_id = mpcb->pm_ops->get_local_id(AF_INET, &addr, sock_net(meta_sk));
  12062. + if (mtreq->loc_id == -1) /* Address not part of the allowed ones */
  12063. + goto drop_and_release;
  12064. + mtreq->rem_id = mopt.rem_id;
  12065. + mtreq->low_prio = mopt.low_prio;
  12066. + tcp_rsk(req)->saw_mpc = 1;
  12067. +
  12068. + if (tcp_v4_send_synack(meta_sk, dst, req, skb_get_queue_mapping(skb)))
  12069. + goto drop_and_free;
  12070. +
  12071. + /* Adding to request queue in metasocket */
  12072. + mptcp_v4_reqsk_queue_hash_add(meta_sk, req, TCP_TIMEOUT_INIT);
  12073. +
  12074. + return;
  12075. +
  12076. +drop_and_release:
  12077. + dst_release(dst);
  12078. +drop_and_free:
  12079. + reqsk_free(req);
  12080. + return;
  12081. +}
  12082. +
  12083. +int mptcp_v4_rem_raddress(struct mptcp_cb *mpcb, u8 id)
  12084. +{
  12085. + int i;
  12086. +
  12087. + for (i = 0; i < MPTCP_MAX_ADDR; i++) {
  12088. + if (!((1 << i) & mpcb->rem4_bits))
  12089. + continue;
  12090. +
  12091. + if (mpcb->remaddr4[i].rem4_id == id) {
  12092. + /* remove address from bitfield */
  12093. + mpcb->rem4_bits &= ~(1 << i);
  12094. +
  12095. + return 0;
  12096. + }
  12097. + }
  12098. +
  12099. + return -1;
  12100. +}
  12101. +
  12102. +/* Based on function tcp_v4_conn_request (tcp_ipv4.c)
  12103. + * Returns -1 if there is no space anymore to store an additional
  12104. + * address
  12105. + */
  12106. +int mptcp_v4_add_raddress(struct mptcp_cb *mpcb, const struct in_addr *addr,
  12107. + __be16 port, u8 id)
  12108. +{
  12109. + int i;
  12110. + struct mptcp_rem4 *rem4;
  12111. +
  12112. + mptcp_for_each_bit_set(mpcb->rem4_bits, i) {
  12113. + rem4 = &mpcb->remaddr4[i];
  12114. +
  12115. + /* Address is already in the list --- continue */
  12116. + if (rem4->rem4_id == id &&
  12117. + rem4->addr.s_addr == addr->s_addr && rem4->port == port)
  12118. + return 0;
  12119. +
  12120. + /* This may be the case, when the peer is behind a NAT. He is
  12121. + * trying to JOIN, thus sending the JOIN with a certain ID.
  12122. + * However the src_addr of the IP-packet has been changed. We
  12123. + * update the addr in the list, because this is the address as
  12124. + * OUR BOX sees it.
  12125. + */
  12126. + if (rem4->rem4_id == id && rem4->addr.s_addr != addr->s_addr) {
  12127. + /* update the address */
  12128. + mptcp_debug("%s: updating old addr:%pI4 to addr %pI4 with id:%d\n",
  12129. + __func__, &rem4->addr.s_addr,
  12130. + &addr->s_addr, id);
  12131. + rem4->addr.s_addr = addr->s_addr;
  12132. + rem4->port = port;
  12133. + mpcb->list_rcvd = 1;
  12134. + return 0;
  12135. + }
  12136. + }
  12137. +
  12138. + i = mptcp_find_free_index(mpcb->rem4_bits);
  12139. + /* Do we have already the maximum number of local/remote addresses? */
  12140. + if (i < 0) {
  12141. + mptcp_debug("%s: At max num of remote addresses: %d --- not adding address: %pI4\n",
  12142. + __func__, MPTCP_MAX_ADDR, &addr->s_addr);
  12143. + return -1;
  12144. + }
  12145. +
  12146. + rem4 = &mpcb->remaddr4[i];
  12147. +
  12148. + /* Address is not known yet, store it */
  12149. + rem4->addr.s_addr = addr->s_addr;
  12150. + rem4->port = port;
  12151. + rem4->bitfield = 0;
  12152. + rem4->retry_bitfield = 0;
  12153. + rem4->rem4_id = id;
  12154. + mpcb->list_rcvd = 1;
  12155. + mpcb->rem4_bits |= (1 << i);
  12156. +
  12157. + return 0;
  12158. +}
  12159. +
  12160. +/* Sets the bitfield of the remote-address field
  12161. + * local address is not set as it will disappear with the global address-list
  12162. + */
  12163. +void mptcp_v4_set_init_addr_bit(struct mptcp_cb *mpcb, __be32 daddr, int index)
  12164. +{
  12165. + int i;
  12166. +
  12167. + mptcp_for_each_bit_set(mpcb->rem4_bits, i) {
  12168. + if (mpcb->remaddr4[i].addr.s_addr == daddr) {
  12169. + mpcb->remaddr4[i].bitfield |= (1 << index);
  12170. + return;
  12171. + }
  12172. + }
  12173. +}
  12174. +
  12175. +/* We only process join requests here. (either the SYN or the final ACK) */
  12176. +int mptcp_v4_do_rcv(struct sock *meta_sk, struct sk_buff *skb)
  12177. +{
  12178. + struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
  12179. + struct sock *child, *rsk = NULL;
  12180. + int ret;
  12181. +
  12182. + if (!(TCP_SKB_CB(skb)->mptcp_flags & MPTCPHDR_JOIN)) {
  12183. + struct tcphdr *th = tcp_hdr(skb);
  12184. + const struct iphdr *iph = ip_hdr(skb);
  12185. + struct sock *sk;
  12186. +
  12187. + sk = inet_lookup_established(sock_net(meta_sk), &tcp_hashinfo,
  12188. + iph->saddr, th->source, iph->daddr,
  12189. + th->dest, inet_iif(skb));
  12190. +
  12191. + if (!sk) {
  12192. + kfree_skb(skb);
  12193. + return 0;
  12194. + }
  12195. + if (is_meta_sk(sk)) {
  12196. + WARN("%s Did not find a sub-sk - did found the meta!\n", __func__);
  12197. + kfree_skb(skb);
  12198. + sock_put(sk);
  12199. + return 0;
  12200. + }
  12201. +
  12202. + if (sk->sk_state == TCP_TIME_WAIT) {
  12203. + inet_twsk_put(inet_twsk(sk));
  12204. + kfree_skb(skb);
  12205. + return 0;
  12206. + }
  12207. +
  12208. + ret = tcp_v4_do_rcv(sk, skb);
  12209. + sock_put(sk);
  12210. +
  12211. + return ret;
  12212. + }
  12213. + TCP_SKB_CB(skb)->mptcp_flags = 0;
  12214. +
  12215. + /* Has been removed from the tk-table. Thus, no new subflows.
  12216. + *
  12217. + * Check for close-state is necessary, because we may have been closed
  12218. + * without passing by mptcp_close().
  12219. + *
  12220. + * When falling back, no new subflows are allowed either.
  12221. + */
  12222. + if (meta_sk->sk_state == TCP_CLOSE || !tcp_sk(meta_sk)->inside_tk_table ||
  12223. + mpcb->infinite_mapping_rcv || mpcb->send_infinite_mapping)
  12224. + goto reset_and_discard;
  12225. +
  12226. + child = tcp_v4_hnd_req(meta_sk, skb);
  12227. +
  12228. + if (!child)
  12229. + goto discard;
  12230. +
  12231. + if (child != meta_sk) {
  12232. + sock_rps_save_rxhash(child, skb);
  12233. + /* We don't call tcp_child_process here, because we hold
  12234. + * already the meta-sk-lock and are sure that it is not owned
  12235. + * by the user.
  12236. + */
  12237. + ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb), skb->len);
  12238. + bh_unlock_sock(child);
  12239. + sock_put(child);
  12240. + if (ret) {
  12241. + rsk = child;
  12242. + goto reset_and_discard;
  12243. + }
  12244. + } else {
  12245. + if (tcp_hdr(skb)->syn) {
  12246. + struct mp_join *join_opt = mptcp_find_join(skb);
  12247. + /* Currently we make two calls to mptcp_find_join(). This
  12248. + * can probably be optimized.
  12249. + */
  12250. + if (mptcp_v4_add_raddress(mpcb,
  12251. + (struct in_addr *)&ip_hdr(skb)->saddr,
  12252. + 0,
  12253. + join_opt->addr_id) < 0)
  12254. + goto reset_and_discard;
  12255. + mpcb->list_rcvd = 0;
  12256. +
  12257. + mptcp_v4_join_request(meta_sk, skb);
  12258. + goto discard;
  12259. + }
  12260. + goto reset_and_discard;
  12261. + }
  12262. + return 0;
  12263. +
  12264. +reset_and_discard:
  12265. + tcp_v4_send_reset(rsk, skb);
  12266. +discard:
  12267. + kfree_skb(skb);
  12268. + return 0;
  12269. +}
  12270. +
  12271. +/* After this, the ref count of the meta_sk associated with the request_sock
  12272. + * is incremented. Thus it is the responsibility of the caller
  12273. + * to call sock_put() when the reference is not needed anymore.
  12274. + */
  12275. +struct sock *mptcp_v4_search_req(const __be16 rport, const __be32 raddr,
  12276. + const __be32 laddr, const struct net *net)
  12277. +{
  12278. + struct mptcp_request_sock *mtreq;
  12279. + struct sock *meta_sk = NULL;
  12280. +
  12281. + spin_lock(&mptcp_reqsk_hlock);
  12282. + list_for_each_entry(mtreq,
  12283. + &mptcp_reqsk_htb[inet_synq_hash(raddr, rport, 0,
  12284. + MPTCP_HASH_SIZE)],
  12285. + collide_tuple) {
  12286. + struct inet_request_sock *ireq = inet_rsk(rev_mptcp_rsk(mtreq));
  12287. + meta_sk = mtreq->mpcb->meta_sk;
  12288. +
  12289. + if (ireq->ir_rmt_port == rport &&
  12290. + ireq->ir_rmt_addr == raddr &&
  12291. + ireq->ir_loc_addr == laddr &&
  12292. + rev_mptcp_rsk(mtreq)->rsk_ops->family == AF_INET &&
  12293. + net_eq(net, sock_net(meta_sk)))
  12294. + break;
  12295. + meta_sk = NULL;
  12296. + }
  12297. +
  12298. + if (meta_sk && unlikely(!atomic_inc_not_zero(&meta_sk->sk_refcnt)))
  12299. + meta_sk = NULL;
  12300. + spin_unlock(&mptcp_reqsk_hlock);
  12301. +
  12302. + return meta_sk;
  12303. +}
  12304. +
  12305. +/* Create a new IPv4 subflow.
  12306. + *
  12307. + * We are in user-context and meta-sock-lock is hold.
  12308. + */
  12309. +int mptcp_init4_subsockets(struct sock *meta_sk, const struct mptcp_loc4 *loc,
  12310. + struct mptcp_rem4 *rem)
  12311. +{
  12312. + struct tcp_sock *tp;
  12313. + struct sock *sk;
  12314. + struct sockaddr_in loc_in, rem_in;
  12315. + struct socket sock;
  12316. + int ulid_size = 0, ret;
  12317. +
  12318. + /** First, create and prepare the new socket */
  12319. +
  12320. + sock.type = meta_sk->sk_socket->type;
  12321. + sock.state = SS_UNCONNECTED;
  12322. + sock.wq = meta_sk->sk_socket->wq;
  12323. + sock.file = meta_sk->sk_socket->file;
  12324. + sock.ops = NULL;
  12325. +
  12326. + ret = inet_create(sock_net(meta_sk), &sock, IPPROTO_TCP, 1);
  12327. + if (unlikely(ret < 0)) {
  12328. + mptcp_debug("%s inet_create failed ret: %d\n", __func__, ret);
  12329. + return ret;
  12330. + }
  12331. +
  12332. + sk = sock.sk;
  12333. + tp = tcp_sk(sk);
  12334. +
  12335. + /* All subsockets need the MPTCP-lock-class */
  12336. + lockdep_set_class_and_name(&(sk)->sk_lock.slock, &meta_slock_key, "slock-AF_INET-MPTCP");
  12337. + lockdep_init_map(&(sk)->sk_lock.dep_map, "sk_lock-AF_INET-MPTCP", &meta_key, 0);
  12338. +
  12339. + if (mptcp_add_sock(meta_sk, sk, loc->loc4_id, rem->rem4_id, GFP_KERNEL))
  12340. + goto error;
  12341. +
  12342. + tp->mptcp->slave_sk = 1;
  12343. + tp->mptcp->low_prio = loc->low_prio;
  12344. +
  12345. + /* Initializing the timer for an MPTCP subflow */
  12346. + setup_timer(&tp->mptcp->mptcp_ack_timer, mptcp_ack_handler, (unsigned long)sk);
  12347. +
  12348. + /** Then, connect the socket to the peer */
  12349. +
  12350. + ulid_size = sizeof(struct sockaddr_in);
  12351. + loc_in.sin_family = AF_INET;
  12352. + rem_in.sin_family = AF_INET;
  12353. + loc_in.sin_port = 0;
  12354. + if (rem->port)
  12355. + rem_in.sin_port = rem->port;
  12356. + else
  12357. + rem_in.sin_port = inet_sk(meta_sk)->inet_dport;
  12358. + loc_in.sin_addr = loc->addr;
  12359. + rem_in.sin_addr = rem->addr;
  12360. +
  12361. + ret = sock.ops->bind(&sock, (struct sockaddr *)&loc_in, ulid_size);
  12362. + if (ret < 0) {
  12363. + mptcp_debug("%s: MPTCP subsocket bind() failed, error %d\n",
  12364. + __func__, ret);
  12365. + goto error;
  12366. + }
  12367. +
  12368. + mptcp_debug("%s: token %#x pi %d src_addr:%pI4:%d dst_addr:%pI4:%d\n",
  12369. + __func__, tcp_sk(meta_sk)->mpcb->mptcp_loc_token,
  12370. + tp->mptcp->path_index, &loc_in.sin_addr,
  12371. + ntohs(loc_in.sin_port), &rem_in.sin_addr,
  12372. + ntohs(rem_in.sin_port));
  12373. +
  12374. + ret = sock.ops->connect(&sock, (struct sockaddr *)&rem_in,
  12375. + ulid_size, O_NONBLOCK);
  12376. + if (ret < 0 && ret != -EINPROGRESS) {
  12377. + mptcp_debug("%s: MPTCP subsocket connect() failed, error %d\n",
  12378. + __func__, ret);
  12379. + goto error;
  12380. + }
  12381. +
  12382. + sk_set_socket(sk, meta_sk->sk_socket);
  12383. + sk->sk_wq = meta_sk->sk_wq;
  12384. +
  12385. + return 0;
  12386. +
  12387. +error:
  12388. + /* May happen if mptcp_add_sock fails first */
  12389. + if (!tp->mpc) {
  12390. + tcp_close(sk, 0);
  12391. + } else {
  12392. + local_bh_disable();
  12393. + mptcp_sub_force_close(sk);
  12394. + local_bh_enable();
  12395. + }
  12396. + return ret;
  12397. +}
  12398. +EXPORT_SYMBOL(mptcp_init4_subsockets);
  12399. +
  12400. +/* General initialization of IPv4 for MPTCP */
  12401. +int mptcp_pm_v4_init(void)
  12402. +{
  12403. + int ret = 0;
  12404. + struct request_sock_ops *ops = &mptcp_request_sock_ops;
  12405. +
  12406. + ops->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", "MPTCP");
  12407. + if (ops->slab_name == NULL) {
  12408. + ret = -ENOMEM;
  12409. + goto out;
  12410. + }
  12411. +
  12412. + ops->slab = kmem_cache_create(ops->slab_name, ops->obj_size, 0,
  12413. + SLAB_DESTROY_BY_RCU|SLAB_HWCACHE_ALIGN,
  12414. + NULL);
  12415. +
  12416. + if (ops->slab == NULL) {
  12417. + ret = -ENOMEM;
  12418. + goto err_reqsk_create;
  12419. + }
  12420. +
  12421. +out:
  12422. + return ret;
  12423. +
  12424. +err_reqsk_create:
  12425. + kfree(ops->slab_name);
  12426. + ops->slab_name = NULL;
  12427. + goto out;
  12428. +}
  12429. +
  12430. +void mptcp_pm_v4_undo(void)
  12431. +{
  12432. + kmem_cache_destroy(mptcp_request_sock_ops.slab);
  12433. + kfree(mptcp_request_sock_ops.slab_name);
  12434. +}
  12435. +
  12436. +
  12437. diff -Nur linux-3.14.45.orig/net/mptcp/mptcp_ipv6.c linux-3.14.45/net/mptcp/mptcp_ipv6.c
  12438. --- linux-3.14.45.orig/net/mptcp/mptcp_ipv6.c 1970-01-01 01:00:00.000000000 +0100
  12439. +++ linux-3.14.45/net/mptcp/mptcp_ipv6.c 2015-06-24 14:15:48.931862523 +0200
  12440. @@ -0,0 +1,822 @@
  12441. +/*
  12442. + * MPTCP implementation - IPv6-specific functions
  12443. + *
  12444. + * Initial Design & Implementation:
  12445. + * Sébastien Barré <sebastien.barre@uclouvain.be>
  12446. + *
  12447. + * Current Maintainer:
  12448. + * Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
  12449. + *
  12450. + * Additional authors:
  12451. + * Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
  12452. + * Gregory Detal <gregory.detal@uclouvain.be>
  12453. + * Fabien Duchêne <fabien.duchene@uclouvain.be>
  12454. + * Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
  12455. + * Lavkesh Lahngir <lavkesh51@gmail.com>
  12456. + * Andreas Ripke <ripke@neclab.eu>
  12457. + * Vlad Dogaru <vlad.dogaru@intel.com>
  12458. + * Octavian Purdila <octavian.purdila@intel.com>
  12459. + * John Ronan <jronan@tssg.org>
  12460. + * Catalin Nicutar <catalin.nicutar@gmail.com>
  12461. + * Brandon Heller <brandonh@stanford.edu>
  12462. + *
  12463. + *
  12464. + * This program is free software; you can redistribute it and/or
  12465. + * modify it under the terms of the GNU General Public License
  12466. + * as published by the Free Software Foundation; either version
  12467. + * 2 of the License, or (at your option) any later version.
  12468. + */
  12469. +
  12470. +#include <linux/export.h>
  12471. +#include <linux/in6.h>
  12472. +#include <linux/kernel.h>
  12473. +
  12474. +#include <net/addrconf.h>
  12475. +#include <net/flow.h>
  12476. +#include <net/inet6_connection_sock.h>
  12477. +#include <net/inet6_hashtables.h>
  12478. +#include <net/inet_common.h>
  12479. +#include <net/ipv6.h>
  12480. +#include <net/ip6_checksum.h>
  12481. +#include <net/ip6_route.h>
  12482. +#include <net/mptcp.h>
  12483. +#include <net/mptcp_v6.h>
  12484. +#include <net/tcp.h>
  12485. +#include <net/transp_v6.h>
  12486. +
  12487. +static int mptcp_v6v4_send_synack(struct sock *meta_sk, struct request_sock *req,
  12488. + u16 queue_mapping);
  12489. +
  12490. +__u32 mptcp_v6_get_nonce(const __be32 *saddr, const __be32 *daddr,
  12491. + __be16 sport, __be16 dport, u32 seq)
  12492. +{
  12493. + u32 secret[MD5_MESSAGE_BYTES / 4];
  12494. + u32 hash[MD5_DIGEST_WORDS];
  12495. + u32 i;
  12496. +
  12497. + memcpy(hash, saddr, 16);
  12498. + for (i = 0; i < 4; i++)
  12499. + secret[i] = mptcp_secret[i] + (__force u32)daddr[i];
  12500. + secret[4] = mptcp_secret[4] +
  12501. + (((__force u16)sport << 16) + (__force u16)dport);
  12502. + secret[5] = seq;
  12503. + for (i = 6; i < MD5_MESSAGE_BYTES / 4; i++)
  12504. + secret[i] = mptcp_secret[i];
  12505. +
  12506. + md5_transform(hash, secret);
  12507. +
  12508. + return hash[0];
  12509. +}
  12510. +
  12511. +u64 mptcp_v6_get_key(const __be32 *saddr, const __be32 *daddr,
  12512. + __be16 sport, __be16 dport)
  12513. +{
  12514. + u32 secret[MD5_MESSAGE_BYTES / 4];
  12515. + u32 hash[MD5_DIGEST_WORDS];
  12516. + u32 i;
  12517. +
  12518. + memcpy(hash, saddr, 16);
  12519. + for (i = 0; i < 4; i++)
  12520. + secret[i] = mptcp_secret[i] + (__force u32)daddr[i];
  12521. + secret[4] = mptcp_secret[4] +
  12522. + (((__force u16)sport << 16) + (__force u16)dport);
  12523. + secret[5] = mptcp_key_seed++;
  12524. + for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
  12525. + secret[i] = mptcp_secret[i];
  12526. +
  12527. + md5_transform(hash, secret);
  12528. +
  12529. + return *((u64 *)hash);
  12530. +}
  12531. +
  12532. +static void mptcp_v6_reqsk_destructor(struct request_sock *req)
  12533. +{
  12534. + mptcp_reqsk_destructor(req);
  12535. +
  12536. + tcp_v6_reqsk_destructor(req);
  12537. +}
  12538. +
  12539. +/* Similar to tcp_v6_rtx_synack */
  12540. +static int mptcp_v6_rtx_synack(struct sock *meta_sk, struct request_sock *req)
  12541. +{
  12542. + if (meta_sk->sk_family == AF_INET6)
  12543. + return tcp_v6_rtx_synack(meta_sk, req);
  12544. +
  12545. + TCP_INC_STATS_BH(sock_net(meta_sk), TCP_MIB_RETRANSSEGS);
  12546. + return mptcp_v6v4_send_synack(meta_sk, req, 0);
  12547. +}
  12548. +
  12549. +/* Similar to tcp6_request_sock_ops */
  12550. +struct request_sock_ops mptcp6_request_sock_ops __read_mostly = {
  12551. + .family = AF_INET6,
  12552. + .obj_size = sizeof(struct mptcp_request_sock),
  12553. + .rtx_syn_ack = mptcp_v6_rtx_synack,
  12554. + .send_ack = tcp_v6_reqsk_send_ack,
  12555. + .destructor = mptcp_v6_reqsk_destructor,
  12556. + .send_reset = tcp_v6_send_reset,
  12557. + .syn_ack_timeout = tcp_syn_ack_timeout,
  12558. +};
  12559. +
  12560. +static void mptcp_v6_reqsk_queue_hash_add(struct sock *meta_sk,
  12561. + struct request_sock *req,
  12562. + unsigned long timeout)
  12563. +{
  12564. + const u32 h1 = inet6_synq_hash(&inet_rsk(req)->ir_v6_rmt_addr,
  12565. + inet_rsk(req)->ir_rmt_port,
  12566. + 0, MPTCP_HASH_SIZE);
  12567. + /* We cannot call inet6_csk_reqsk_queue_hash_add(), because we do not
  12568. + * want to reset the keepalive-timer (responsible for retransmitting
  12569. + * SYN/ACKs). We do not retransmit SYN/ACKs+MP_JOINs, because we cannot
  12570. + * overload the keepalive timer. Also, it's not a big deal, because the
  12571. + * third ACK of the MP_JOIN-handshake is sent in a reliable manner. So,
  12572. + * if the third ACK gets lost, the client will handle the retransmission
  12573. + * anyways. If our SYN/ACK gets lost, the client will retransmit the
  12574. + * SYN.
  12575. + */
  12576. + struct inet_connection_sock *meta_icsk = inet_csk(meta_sk);
  12577. + struct listen_sock *lopt = meta_icsk->icsk_accept_queue.listen_opt;
  12578. + const u32 h2 = inet6_synq_hash(&inet_rsk(req)->ir_v6_rmt_addr,
  12579. + inet_rsk(req)->ir_rmt_port,
  12580. + lopt->hash_rnd, lopt->nr_table_entries);
  12581. +
  12582. + reqsk_queue_hash_req(&meta_icsk->icsk_accept_queue, h2, req, timeout);
  12583. + reqsk_queue_added(&meta_icsk->icsk_accept_queue);
  12584. +
  12585. + spin_lock(&mptcp_reqsk_hlock);
  12586. + list_add(&mptcp_rsk(req)->collide_tuple, &mptcp_reqsk_htb[h1]);
  12587. + spin_unlock(&mptcp_reqsk_hlock);
  12588. +}
  12589. +
  12590. +/* Similar to tcp_v6_send_synack
  12591. + *
  12592. + * The meta-socket is IPv4, but a new subsocket is IPv6
  12593. + */
  12594. +static int mptcp_v6v4_send_synack(struct sock *meta_sk, struct request_sock *req,
  12595. + u16 queue_mapping)
  12596. +{
  12597. + struct inet_request_sock *treq = inet_rsk(req);
  12598. + struct sk_buff *skb;
  12599. + struct flowi6 fl6;
  12600. + struct dst_entry *dst;
  12601. + int err = -ENOMEM;
  12602. +
  12603. + memset(&fl6, 0, sizeof(fl6));
  12604. + fl6.flowi6_proto = IPPROTO_TCP;
  12605. + fl6.daddr = treq->ir_v6_rmt_addr;
  12606. + fl6.saddr = treq->ir_v6_loc_addr;
  12607. + fl6.flowlabel = 0;
  12608. + fl6.flowi6_oif = treq->ir_iif;
  12609. + fl6.flowi6_mark = meta_sk->sk_mark;
  12610. + fl6.fl6_dport = inet_rsk(req)->ir_rmt_port;
  12611. + fl6.fl6_sport = htons(inet_rsk(req)->ir_num);
  12612. + security_req_classify_flow(req, flowi6_to_flowi(&fl6));
  12613. +
  12614. + dst = ip6_dst_lookup_flow(meta_sk, &fl6, NULL);
  12615. + if (IS_ERR(dst)) {
  12616. + err = PTR_ERR(dst);
  12617. + return err;
  12618. + }
  12619. + skb = tcp_make_synack(meta_sk, dst, req, NULL);
  12620. +
  12621. + if (skb) {
  12622. + __tcp_v6_send_check(skb, &treq->ir_v6_loc_addr,
  12623. + &treq->ir_v6_rmt_addr);
  12624. +
  12625. + fl6.daddr = treq->ir_v6_rmt_addr;
  12626. + skb_set_queue_mapping(skb, queue_mapping);
  12627. + err = ip6_xmit(meta_sk, skb, &fl6, NULL, 0);
  12628. + err = net_xmit_eval(err);
  12629. + }
  12630. +
  12631. + return err;
  12632. +}
  12633. +
  12634. +/* Similar to tcp_v6_syn_recv_sock
  12635. + *
  12636. + * The meta-socket is IPv4, but a new subsocket is IPv6
  12637. + */
  12638. +struct sock *mptcp_v6v4_syn_recv_sock(struct sock *meta_sk, struct sk_buff *skb,
  12639. + struct request_sock *req,
  12640. + struct dst_entry *dst)
  12641. +{
  12642. + struct inet_request_sock *treq;
  12643. + struct ipv6_pinfo *newnp;
  12644. + struct tcp6_sock *newtcp6sk;
  12645. + struct inet_sock *newinet;
  12646. + struct tcp_sock *newtp;
  12647. + struct sock *newsk;
  12648. +
  12649. + treq = inet_rsk(req);
  12650. +
  12651. + if (sk_acceptq_is_full(meta_sk))
  12652. + goto out_overflow;
  12653. +
  12654. + if (!dst) {
  12655. + /* This code is similar to inet6_csk_route_req, but as we
  12656. + * don't have a np-pointer in the meta, we have to do it
  12657. + * manually.
  12658. + */
  12659. + struct flowi6 fl6;
  12660. +
  12661. + memset(&fl6, 0, sizeof(fl6));
  12662. + fl6.flowi6_proto = IPPROTO_TCP;
  12663. + fl6.daddr = treq->ir_v6_rmt_addr;
  12664. + fl6.saddr = treq->ir_v6_loc_addr;
  12665. + fl6.flowi6_oif = treq->ir_iif;
  12666. + fl6.flowi6_mark = meta_sk->sk_mark;
  12667. + fl6.fl6_dport = inet_rsk(req)->ir_rmt_port;
  12668. + fl6.fl6_sport = htons(inet_rsk(req)->ir_num);
  12669. + security_req_classify_flow(req, flowi6_to_flowi(&fl6));
  12670. +
  12671. + dst = ip6_dst_lookup_flow(meta_sk, &fl6, NULL);
  12672. + if (IS_ERR(dst))
  12673. + goto out;
  12674. + }
  12675. +
  12676. + newsk = tcp_create_openreq_child(meta_sk, req, skb);
  12677. + if (newsk == NULL)
  12678. + goto out_nonewsk;
  12679. +
  12680. + /* Diff to tcp_v6_syn_recv_sock: Must do this prior to __ip6_dst_store,
  12681. + * as it tries to access the pinet6-pointer.
  12682. + */
  12683. + newtcp6sk = (struct tcp6_sock *)newsk;
  12684. + inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
  12685. +
  12686. + /*
  12687. + * No need to charge this sock to the relevant IPv6 refcnt debug socks
  12688. + * count here, tcp_create_openreq_child now does this for us, see the
  12689. + * comment in that function for the gory details. -acme
  12690. + */
  12691. +
  12692. + newsk->sk_gso_type = SKB_GSO_TCPV6;
  12693. + __ip6_dst_store(newsk, dst, NULL, NULL);
  12694. + inet6_sk_rx_dst_set(newsk, skb);
  12695. +
  12696. + newtp = tcp_sk(newsk);
  12697. + newinet = inet_sk(newsk);
  12698. + newnp = inet6_sk(newsk);
  12699. +
  12700. + newsk->sk_v6_daddr = treq->ir_v6_rmt_addr;
  12701. + newnp->saddr = treq->ir_v6_loc_addr;
  12702. + newsk->sk_v6_rcv_saddr = treq->ir_v6_loc_addr;
  12703. + newsk->sk_bound_dev_if = treq->ir_iif;
  12704. +
  12705. + /* Now IPv6 options...
  12706. +
  12707. + First: no IPv4 options.
  12708. + */
  12709. + newinet->inet_opt = NULL;
  12710. + newnp->ipv6_ac_list = NULL;
  12711. + newnp->ipv6_fl_list = NULL;
  12712. + newnp->rxopt.all = 0;
  12713. +
  12714. + /* Clone pktoptions received with SYN */
  12715. + newnp->pktoptions = NULL;
  12716. + if (treq->pktopts != NULL) {
  12717. + newnp->pktoptions = skb_clone(treq->pktopts,
  12718. + sk_gfp_atomic(meta_sk, GFP_ATOMIC));
  12719. + consume_skb(treq->pktopts);
  12720. + treq->pktopts = NULL;
  12721. + if (newnp->pktoptions)
  12722. + skb_set_owner_r(newnp->pktoptions, newsk);
  12723. + }
  12724. + newnp->opt = NULL;
  12725. + newnp->mcast_oif = inet6_iif(skb);
  12726. + newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
  12727. + newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
  12728. +
  12729. + /* Initialization copied from inet6_create - normally this should have
  12730. + * been handled by the memcpy as in tcp_v6_syn_recv_sock
  12731. + */
  12732. + newnp->hop_limit = -1;
  12733. + newnp->mc_loop = 1;
  12734. + newnp->pmtudisc = IPV6_PMTUDISC_WANT;
  12735. + (void)xchg(&newnp->rxpmtu, NULL);
  12736. +
  12737. + inet_csk(newsk)->icsk_ext_hdr_len = 0;
  12738. +
  12739. + tcp_mtup_init(newsk);
  12740. + tcp_sync_mss(newsk, dst_mtu(dst));
  12741. + newtp->advmss = dst_metric_advmss(dst);
  12742. + if (tcp_sk(meta_sk)->rx_opt.user_mss &&
  12743. + tcp_sk(meta_sk)->rx_opt.user_mss < newtp->advmss)
  12744. + newtp->advmss = tcp_sk(meta_sk)->rx_opt.user_mss;
  12745. +
  12746. + tcp_initialize_rcv_mss(newsk);
  12747. +
  12748. + newinet->inet_daddr = LOOPBACK4_IPV6;
  12749. + newinet->inet_saddr = LOOPBACK4_IPV6;
  12750. + newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
  12751. +
  12752. + if (__inet_inherit_port(meta_sk, newsk) < 0) {
  12753. + inet_csk_prepare_forced_close(newsk);
  12754. + tcp_done(newsk);
  12755. + goto out;
  12756. + }
  12757. + __inet6_hash(newsk, NULL);
  12758. +
  12759. + return newsk;
  12760. +
  12761. +out_overflow:
  12762. + NET_INC_STATS_BH(sock_net(meta_sk), LINUX_MIB_LISTENOVERFLOWS);
  12763. +out_nonewsk:
  12764. + dst_release(dst);
  12765. +out:
  12766. + NET_INC_STATS_BH(sock_net(meta_sk), LINUX_MIB_LISTENDROPS);
  12767. + return NULL;
  12768. +}
  12769. +
  12770. +/* Similar to tcp_v6_conn_request */
  12771. +static void mptcp_v6_join_request(struct sock *meta_sk, struct sk_buff *skb)
  12772. +{
  12773. + struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
  12774. + struct tcp_options_received tmp_opt;
  12775. + struct mptcp_options_received mopt;
  12776. + struct ipv6_pinfo *np = inet6_sk(meta_sk);
  12777. + struct request_sock *req;
  12778. + struct inet_request_sock *treq;
  12779. + struct mptcp_request_sock *mtreq;
  12780. + u8 mptcp_hash_mac[20];
  12781. + __u32 isn = TCP_SKB_CB(skb)->when;
  12782. + struct dst_entry *dst = NULL;
  12783. + struct flowi6 fl6;
  12784. + int want_cookie = 0;
  12785. + union inet_addr addr;
  12786. +
  12787. + tcp_clear_options(&tmp_opt);
  12788. + mptcp_init_mp_opt(&mopt);
  12789. + tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
  12790. + tmp_opt.user_mss = tcp_sk(meta_sk)->rx_opt.user_mss;
  12791. + tcp_parse_options(skb, &tmp_opt, &mopt, 0, NULL);
  12792. +
  12793. + req = inet6_reqsk_alloc(&mptcp6_request_sock_ops);
  12794. + if (!req)
  12795. + return;
  12796. +
  12797. +#ifdef CONFIG_TCP_MD5SIG
  12798. + tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
  12799. +#endif
  12800. +
  12801. + tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
  12802. + tcp_openreq_init(req, &tmp_opt, skb);
  12803. +
  12804. + treq = inet_rsk(req);
  12805. + treq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
  12806. + treq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
  12807. +
  12808. + if (!want_cookie || tmp_opt.tstamp_ok)
  12809. + TCP_ECN_create_request(req, skb, sock_net(meta_sk));
  12810. +
  12811. + treq->ir_iif = meta_sk->sk_bound_dev_if;
  12812. +
  12813. + /* So that link locals have meaning */
  12814. + if (!meta_sk->sk_bound_dev_if &&
  12815. + ipv6_addr_type(&treq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
  12816. + treq->ir_iif = inet6_iif(skb);
  12817. +
  12818. + if (!isn) {
  12819. + if (meta_sk->sk_family == AF_INET6 &&
  12820. + (ipv6_opt_accepted(meta_sk, skb) ||
  12821. + np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
  12822. + np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)) {
  12823. + atomic_inc(&skb->users);
  12824. + treq->pktopts = skb;
  12825. + }
  12826. +
  12827. + /* VJ's idea. We save last timestamp seen
  12828. + * from the destination in peer table, when entering
  12829. + * state TIME-WAIT, and check against it before
  12830. + * accepting new connection request.
  12831. + *
  12832. + * If "isn" is not zero, this request hit alive
  12833. + * timewait bucket, so that all the necessary checks
  12834. + * are made in the function processing timewait state.
  12835. + */
  12836. + if (tmp_opt.saw_tstamp &&
  12837. + tcp_death_row.sysctl_tw_recycle &&
  12838. + (dst = inet6_csk_route_req(meta_sk, &fl6, req)) != NULL) {
  12839. + if (!tcp_peer_is_proven(req, dst, true)) {
  12840. + NET_INC_STATS_BH(sock_net(meta_sk), LINUX_MIB_PAWSPASSIVEREJECTED);
  12841. + goto drop_and_release;
  12842. + }
  12843. + }
  12844. + /* Kill the following clause, if you dislike this way. */
  12845. + else if (!sysctl_tcp_syncookies &&
  12846. + (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(meta_sk) <
  12847. + (sysctl_max_syn_backlog >> 2)) &&
  12848. + !tcp_peer_is_proven(req, dst, false)) {
  12849. + /* Without syncookies last quarter of
  12850. + * backlog is filled with destinations,
  12851. + * proven to be alive.
  12852. + * It means that we continue to communicate
  12853. + * to destinations, already remembered
  12854. + * to the moment of synflood.
  12855. + */
  12856. + LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
  12857. + &treq->ir_v6_rmt_addr,
  12858. + ntohs(tcp_hdr(skb)->source));
  12859. + goto drop_and_release;
  12860. + }
  12861. +
  12862. + isn = tcp_v6_init_sequence(skb);
  12863. + }
  12864. +
  12865. + tcp_rsk(req)->snt_isn = isn;
  12866. + tcp_rsk(req)->snt_synack = tcp_time_stamp;
  12867. + tcp_rsk(req)->listener = NULL;
  12868. +
  12869. + mtreq = mptcp_rsk(req);
  12870. + mtreq->mpcb = mpcb;
  12871. + INIT_LIST_HEAD(&mtreq->collide_tuple);
  12872. + mtreq->mptcp_rem_nonce = mopt.mptcp_recv_nonce;
  12873. + mtreq->mptcp_rem_key = mpcb->mptcp_rem_key;
  12874. + mtreq->mptcp_loc_key = mpcb->mptcp_loc_key;
  12875. + mtreq->mptcp_loc_nonce = mptcp_v6_get_nonce(ipv6_hdr(skb)->daddr.s6_addr32,
  12876. + ipv6_hdr(skb)->saddr.s6_addr32,
  12877. + tcp_hdr(skb)->dest,
  12878. + tcp_hdr(skb)->source, isn);
  12879. + mptcp_hmac_sha1((u8 *)&mtreq->mptcp_loc_key,
  12880. + (u8 *)&mtreq->mptcp_rem_key,
  12881. + (u8 *)&mtreq->mptcp_loc_nonce,
  12882. + (u8 *)&mtreq->mptcp_rem_nonce, (u32 *)mptcp_hash_mac);
  12883. + mtreq->mptcp_hash_tmac = *(u64 *)mptcp_hash_mac;
  12884. +
  12885. + addr.in6 = treq->ir_v6_loc_addr;
  12886. + mtreq->loc_id = mpcb->pm_ops->get_local_id(AF_INET6, &addr, sock_net(meta_sk));
  12887. + if (mtreq->loc_id == -1) /* Address not part of the allowed ones */
  12888. + goto drop_and_release;
  12889. + mtreq->rem_id = mopt.rem_id;
  12890. + mtreq->low_prio = mopt.low_prio;
  12891. + tcp_rsk(req)->saw_mpc = 1;
  12892. +
  12893. + if (meta_sk->sk_family == AF_INET6) {
  12894. + if (tcp_v6_send_synack(meta_sk, dst, &fl6, req,
  12895. + skb_get_queue_mapping(skb)))
  12896. + goto drop_and_free;
  12897. + } else {
  12898. + if (mptcp_v6v4_send_synack(meta_sk, req, skb_get_queue_mapping(skb)))
  12899. + goto drop_and_free;
  12900. + }
  12901. +
  12902. + /* Adding to request queue in metasocket */
  12903. + mptcp_v6_reqsk_queue_hash_add(meta_sk, req, TCP_TIMEOUT_INIT);
  12904. +
  12905. + return;
  12906. +
  12907. +drop_and_release:
  12908. + dst_release(dst);
  12909. +drop_and_free:
  12910. + reqsk_free(req);
  12911. + return;
  12912. +}
  12913. +
  12914. +int mptcp_v6_rem_raddress(struct mptcp_cb *mpcb, u8 id)
  12915. +{
  12916. + int i;
  12917. +
  12918. + for (i = 0; i < MPTCP_MAX_ADDR; i++) {
  12919. + if (!((1 << i) & mpcb->rem6_bits))
  12920. + continue;
  12921. +
  12922. + if (mpcb->remaddr6[i].rem6_id == id) {
  12923. + /* remove address from bitfield */
  12924. + mpcb->rem6_bits &= ~(1 << i);
  12925. +
  12926. + return 0;
  12927. + }
  12928. + }
  12929. +
  12930. + return -1;
  12931. +}
  12932. +
  12933. +/* Returns -1 if there is no space anymore to store an additional
  12934. + * address
  12935. + */
  12936. +int mptcp_v6_add_raddress(struct mptcp_cb *mpcb, const struct in6_addr *addr,
  12937. + __be16 port, u8 id)
  12938. +{
  12939. + int i;
  12940. + struct mptcp_rem6 *rem6;
  12941. +
  12942. + mptcp_for_each_bit_set(mpcb->rem6_bits, i) {
  12943. + rem6 = &mpcb->remaddr6[i];
  12944. +
  12945. + /* Address is already in the list --- continue */
  12946. + if (rem6->rem6_id == id &&
  12947. + ipv6_addr_equal(&rem6->addr, addr) && rem6->port == port)
  12948. + return 0;
  12949. +
  12950. + /* This may be the case, when the peer is behind a NAT. He is
  12951. + * trying to JOIN, thus sending the JOIN with a certain ID.
  12952. + * However the src_addr of the IP-packet has been changed. We
  12953. + * update the addr in the list, because this is the address as
  12954. + * OUR BOX sees it.
  12955. + */
  12956. + if (rem6->rem6_id == id) {
  12957. + /* update the address */
  12958. + mptcp_debug("%s: updating old addr: %pI6 to addr %pI6 with id:%d\n",
  12959. + __func__, &rem6->addr, addr, id);
  12960. + rem6->addr = *addr;
  12961. + rem6->port = port;
  12962. + mpcb->list_rcvd = 1;
  12963. + return 0;
  12964. + }
  12965. + }
  12966. +
  12967. + i = mptcp_find_free_index(mpcb->rem6_bits);
  12968. + /* Do we have already the maximum number of local/remote addresses? */
  12969. + if (i < 0) {
  12970. + mptcp_debug("%s: At max num of remote addresses: %d --- not adding address: %pI6\n",
  12971. + __func__, MPTCP_MAX_ADDR, addr);
  12972. + return -1;
  12973. + }
  12974. +
  12975. + rem6 = &mpcb->remaddr6[i];
  12976. +
  12977. + /* Address is not known yet, store it */
  12978. + rem6->addr = *addr;
  12979. + rem6->port = port;
  12980. + rem6->bitfield = 0;
  12981. + rem6->retry_bitfield = 0;
  12982. + rem6->rem6_id = id;
  12983. + mpcb->list_rcvd = 1;
  12984. + mpcb->rem6_bits |= (1 << i);
  12985. +
  12986. + return 0;
  12987. +}
  12988. +
  12989. +/* Sets the bitfield of the remote-address field
  12990. + * local address is not set as it will disappear with the global address-list
  12991. + */
  12992. +void mptcp_v6_set_init_addr_bit(struct mptcp_cb *mpcb,
  12993. + const struct in6_addr *daddr, int index)
  12994. +{
  12995. + int i;
  12996. + mptcp_for_each_bit_set(mpcb->rem6_bits, i) {
  12997. + if (ipv6_addr_equal(&mpcb->remaddr6[i].addr, daddr)) {
  12998. + mpcb->remaddr6[i].bitfield |= (1 << index);
  12999. + return;
  13000. + }
  13001. + }
  13002. +}
  13003. +
  13004. +int mptcp_v6_do_rcv(struct sock *meta_sk, struct sk_buff *skb)
  13005. +{
  13006. + struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
  13007. + struct sock *child, *rsk = NULL;
  13008. + int ret;
  13009. +
  13010. + if (!(TCP_SKB_CB(skb)->mptcp_flags & MPTCPHDR_JOIN)) {
  13011. + struct tcphdr *th = tcp_hdr(skb);
  13012. + const struct ipv6hdr *ip6h = ipv6_hdr(skb);
  13013. + struct sock *sk;
  13014. +
  13015. + sk = __inet6_lookup_established(sock_net(meta_sk),
  13016. + &tcp_hashinfo,
  13017. + &ip6h->saddr, th->source,
  13018. + &ip6h->daddr, ntohs(th->dest),
  13019. + inet6_iif(skb));
  13020. +
  13021. + if (!sk) {
  13022. + kfree_skb(skb);
  13023. + return 0;
  13024. + }
  13025. + if (is_meta_sk(sk)) {
  13026. + WARN("%s Did not find a sub-sk!\n", __func__);
  13027. + kfree_skb(skb);
  13028. + sock_put(sk);
  13029. + return 0;
  13030. + }
  13031. +
  13032. + if (sk->sk_state == TCP_TIME_WAIT) {
  13033. + inet_twsk_put(inet_twsk(sk));
  13034. + kfree_skb(skb);
  13035. + return 0;
  13036. + }
  13037. +
  13038. + ret = tcp_v6_do_rcv(sk, skb);
  13039. + sock_put(sk);
  13040. +
  13041. + return ret;
  13042. + }
  13043. + TCP_SKB_CB(skb)->mptcp_flags = 0;
  13044. +
  13045. + /* Has been removed from the tk-table. Thus, no new subflows.
  13046. + *
  13047. + * Check for close-state is necessary, because we may have been closed
  13048. + * without passing by mptcp_close().
  13049. + *
  13050. + * When falling back, no new subflows are allowed either.
  13051. + */
  13052. + if (meta_sk->sk_state == TCP_CLOSE || !tcp_sk(meta_sk)->inside_tk_table ||
  13053. + mpcb->infinite_mapping_rcv || mpcb->send_infinite_mapping)
  13054. + goto reset_and_discard;
  13055. +
  13056. + child = tcp_v6_hnd_req(meta_sk, skb);
  13057. +
  13058. + if (!child)
  13059. + goto discard;
  13060. +
  13061. + if (child != meta_sk) {
  13062. + sock_rps_save_rxhash(child, skb);
  13063. + /* We don't call tcp_child_process here, because we hold
  13064. + * already the meta-sk-lock and are sure that it is not owned
  13065. + * by the user.
  13066. + */
  13067. + ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb), skb->len);
  13068. + bh_unlock_sock(child);
  13069. + sock_put(child);
  13070. + if (ret) {
  13071. + rsk = child;
  13072. + goto reset_and_discard;
  13073. + }
  13074. + } else {
  13075. + if (tcp_hdr(skb)->syn) {
  13076. + struct mp_join *join_opt = mptcp_find_join(skb);
  13077. + /* Currently we make two calls to mptcp_find_join(). This
  13078. + * can probably be optimized. */
  13079. + if (mptcp_v6_add_raddress(mpcb,
  13080. + (struct in6_addr *)&ipv6_hdr(skb)->saddr,
  13081. + 0,
  13082. + join_opt->addr_id) < 0)
  13083. + goto reset_and_discard;
  13084. + mpcb->list_rcvd = 0;
  13085. +
  13086. + mptcp_v6_join_request(meta_sk, skb);
  13087. + goto discard;
  13088. + }
  13089. + goto reset_and_discard;
  13090. + }
  13091. + return 0;
  13092. +
  13093. +reset_and_discard:
  13094. + tcp_v6_send_reset(rsk, skb);
  13095. +discard:
  13096. + kfree_skb(skb);
  13097. + return 0;
  13098. +}
  13099. +
  13100. +/* After this, the ref count of the meta_sk associated with the request_sock
  13101. + * is incremented. Thus it is the responsibility of the caller
  13102. + * to call sock_put() when the reference is not needed anymore.
  13103. + */
  13104. +struct sock *mptcp_v6_search_req(const __be16 rport, const struct in6_addr *raddr,
  13105. + const struct in6_addr *laddr, const struct net *net)
  13106. +{
  13107. + struct mptcp_request_sock *mtreq;
  13108. + struct sock *meta_sk = NULL;
  13109. +
  13110. + spin_lock(&mptcp_reqsk_hlock);
  13111. + list_for_each_entry(mtreq,
  13112. + &mptcp_reqsk_htb[inet6_synq_hash(raddr, rport, 0,
  13113. + MPTCP_HASH_SIZE)],
  13114. + collide_tuple) {
  13115. + struct inet_request_sock *treq = inet_rsk(rev_mptcp_rsk(mtreq));
  13116. + meta_sk = mtreq->mpcb->meta_sk;
  13117. +
  13118. + if (inet_rsk(rev_mptcp_rsk(mtreq))->ir_rmt_port == rport &&
  13119. + rev_mptcp_rsk(mtreq)->rsk_ops->family == AF_INET6 &&
  13120. + ipv6_addr_equal(&treq->ir_v6_rmt_addr, raddr) &&
  13121. + ipv6_addr_equal(&treq->ir_v6_loc_addr, laddr) &&
  13122. + net_eq(net, sock_net(meta_sk)))
  13123. + break;
  13124. + meta_sk = NULL;
  13125. + }
  13126. +
  13127. + if (meta_sk && unlikely(!atomic_inc_not_zero(&meta_sk->sk_refcnt)))
  13128. + meta_sk = NULL;
  13129. + spin_unlock(&mptcp_reqsk_hlock);
  13130. +
  13131. + return meta_sk;
  13132. +}
  13133. +
  13134. +/* Create a new IPv6 subflow.
  13135. + *
  13136. + * We are in user-context and meta-sock-lock is hold.
  13137. + */
  13138. +int mptcp_init6_subsockets(struct sock *meta_sk, const struct mptcp_loc6 *loc,
  13139. + struct mptcp_rem6 *rem)
  13140. +{
  13141. + struct tcp_sock *tp;
  13142. + struct sock *sk;
  13143. + struct sockaddr_in6 loc_in, rem_in;
  13144. + struct socket sock;
  13145. + int ulid_size = 0, ret;
  13146. +
  13147. + /** First, create and prepare the new socket */
  13148. +
  13149. + sock.type = meta_sk->sk_socket->type;
  13150. + sock.state = SS_UNCONNECTED;
  13151. + sock.wq = meta_sk->sk_socket->wq;
  13152. + sock.file = meta_sk->sk_socket->file;
  13153. + sock.ops = NULL;
  13154. +
  13155. + ret = inet6_create(sock_net(meta_sk), &sock, IPPROTO_TCP, 1);
  13156. + if (unlikely(ret < 0)) {
  13157. + mptcp_debug("%s inet6_create failed ret: %d\n", __func__, ret);
  13158. + return ret;
  13159. + }
  13160. +
  13161. + sk = sock.sk;
  13162. + tp = tcp_sk(sk);
  13163. +
  13164. + /* All subsockets need the MPTCP-lock-class */
  13165. + lockdep_set_class_and_name(&(sk)->sk_lock.slock, &meta_slock_key, "slock-AF_INET-MPTCP");
  13166. + lockdep_init_map(&(sk)->sk_lock.dep_map, "sk_lock-AF_INET-MPTCP", &meta_key, 0);
  13167. +
  13168. + if (mptcp_add_sock(meta_sk, sk, loc->loc6_id, rem->rem6_id, GFP_KERNEL))
  13169. + goto error;
  13170. +
  13171. + tp->mptcp->slave_sk = 1;
  13172. + tp->mptcp->low_prio = loc->low_prio;
  13173. +
  13174. + /* Initializing the timer for an MPTCP subflow */
  13175. + setup_timer(&tp->mptcp->mptcp_ack_timer, mptcp_ack_handler, (unsigned long)sk);
  13176. +
  13177. + /** Then, connect the socket to the peer */
  13178. +
  13179. + ulid_size = sizeof(struct sockaddr_in6);
  13180. + loc_in.sin6_family = AF_INET6;
  13181. + rem_in.sin6_family = AF_INET6;
  13182. + loc_in.sin6_port = 0;
  13183. + if (rem->port)
  13184. + rem_in.sin6_port = rem->port;
  13185. + else
  13186. + rem_in.sin6_port = inet_sk(meta_sk)->inet_dport;
  13187. + loc_in.sin6_addr = loc->addr;
  13188. + rem_in.sin6_addr = rem->addr;
  13189. +
  13190. + ret = sock.ops->bind(&sock, (struct sockaddr *)&loc_in, ulid_size);
  13191. + if (ret < 0) {
  13192. + mptcp_debug("%s: MPTCP subsocket bind()failed, error %d\n",
  13193. + __func__, ret);
  13194. + goto error;
  13195. + }
  13196. +
  13197. + mptcp_debug("%s: token %#x pi %d src_addr:%pI6:%d dst_addr:%pI6:%d\n",
  13198. + __func__, tcp_sk(meta_sk)->mpcb->mptcp_loc_token,
  13199. + tp->mptcp->path_index, &loc_in.sin6_addr,
  13200. + ntohs(loc_in.sin6_port), &rem_in.sin6_addr,
  13201. + ntohs(rem_in.sin6_port));
  13202. +
  13203. + ret = sock.ops->connect(&sock, (struct sockaddr *)&rem_in,
  13204. + ulid_size, O_NONBLOCK);
  13205. + if (ret < 0 && ret != -EINPROGRESS) {
  13206. + mptcp_debug("%s: MPTCP subsocket connect() failed, error %d\n",
  13207. + __func__, ret);
  13208. + goto error;
  13209. + }
  13210. +
  13211. + sk_set_socket(sk, meta_sk->sk_socket);
  13212. + sk->sk_wq = meta_sk->sk_wq;
  13213. +
  13214. + return 0;
  13215. +
  13216. +error:
  13217. + /* May happen if mptcp_add_sock fails first */
  13218. + if (!tp->mpc) {
  13219. + tcp_close(sk, 0);
  13220. + } else {
  13221. + local_bh_disable();
  13222. + mptcp_sub_force_close(sk);
  13223. + local_bh_enable();
  13224. + }
  13225. + return ret;
  13226. +}
  13227. +EXPORT_SYMBOL(mptcp_init6_subsockets);
  13228. +
  13229. +int mptcp_pm_v6_init(void)
  13230. +{
  13231. + int ret = 0;
  13232. + struct request_sock_ops *ops = &mptcp6_request_sock_ops;
  13233. +
  13234. + ops->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", "MPTCP6");
  13235. + if (ops->slab_name == NULL) {
  13236. + ret = -ENOMEM;
  13237. + goto out;
  13238. + }
  13239. +
  13240. + ops->slab = kmem_cache_create(ops->slab_name, ops->obj_size, 0,
  13241. + SLAB_DESTROY_BY_RCU|SLAB_HWCACHE_ALIGN,
  13242. + NULL);
  13243. +
  13244. + if (ops->slab == NULL) {
  13245. + ret = -ENOMEM;
  13246. + goto err_reqsk_create;
  13247. + }
  13248. +
  13249. +out:
  13250. + return ret;
  13251. +
  13252. +err_reqsk_create:
  13253. + kfree(ops->slab_name);
  13254. + ops->slab_name = NULL;
  13255. + goto out;
  13256. +}
  13257. +
  13258. +void mptcp_pm_v6_undo(void)
  13259. +{
  13260. + kmem_cache_destroy(mptcp6_request_sock_ops.slab);
  13261. + kfree(mptcp6_request_sock_ops.slab_name);
  13262. +}
  13263. diff -Nur linux-3.14.45.orig/net/mptcp/mptcp_ndiffports.c linux-3.14.45/net/mptcp/mptcp_ndiffports.c
  13264. --- linux-3.14.45.orig/net/mptcp/mptcp_ndiffports.c 1970-01-01 01:00:00.000000000 +0100
  13265. +++ linux-3.14.45/net/mptcp/mptcp_ndiffports.c 2015-06-24 14:15:48.931862523 +0200
  13266. @@ -0,0 +1,171 @@
  13267. +#include <linux/module.h>
  13268. +
  13269. +#include <net/mptcp.h>
  13270. +#include <net/mptcp_v4.h>
  13271. +
  13272. +#if IS_ENABLED(CONFIG_IPV6)
  13273. +#include <net/mptcp_v6.h>
  13274. +#endif
  13275. +
  13276. +struct ndiffports_priv {
  13277. + /* Worker struct for subflow establishment */
  13278. + struct work_struct subflow_work;
  13279. +
  13280. + struct mptcp_cb *mpcb;
  13281. +};
  13282. +
  13283. +static int sysctl_mptcp_ndiffports __read_mostly = 2;
  13284. +
  13285. +/**
  13286. + * Create all new subflows, by doing calls to mptcp_initX_subsockets
  13287. + *
  13288. + * This function uses a goto next_subflow, to allow releasing the lock between
  13289. + * new subflows and giving other processes a chance to do some work on the
  13290. + * socket and potentially finishing the communication.
  13291. + **/
  13292. +static void create_subflow_worker(struct work_struct *work)
  13293. +{
  13294. + struct ndiffports_priv *pm_priv = container_of(work,
  13295. + struct ndiffports_priv,
  13296. + subflow_work);
  13297. + struct mptcp_cb *mpcb = pm_priv->mpcb;
  13298. + struct sock *meta_sk = mpcb->meta_sk;
  13299. + int iter = 0;
  13300. +
  13301. +next_subflow:
  13302. + if (iter) {
  13303. + release_sock(meta_sk);
  13304. + mutex_unlock(&mpcb->mpcb_mutex);
  13305. +
  13306. + yield();
  13307. + }
  13308. + mutex_lock(&mpcb->mpcb_mutex);
  13309. + lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING);
  13310. +
  13311. + iter++;
  13312. +
  13313. + if (sock_flag(meta_sk, SOCK_DEAD))
  13314. + goto exit;
  13315. +
  13316. + if (mpcb->master_sk &&
  13317. + !tcp_sk(mpcb->master_sk)->mptcp->fully_established)
  13318. + goto exit;
  13319. +
  13320. + if (sysctl_mptcp_ndiffports > iter &&
  13321. + sysctl_mptcp_ndiffports > mpcb->cnt_subflows) {
  13322. + if (meta_sk->sk_family == AF_INET ||
  13323. + mptcp_v6_is_v4_mapped(meta_sk)) {
  13324. + struct mptcp_loc4 loc;
  13325. +
  13326. + loc.addr.s_addr = inet_sk(meta_sk)->inet_saddr;
  13327. + loc.loc4_id = 0;
  13328. + loc.low_prio = 0;
  13329. +
  13330. + mptcp_init4_subsockets(meta_sk, &loc, &mpcb->remaddr4[0]);
  13331. + } else {
  13332. +#if IS_ENABLED(CONFIG_IPV6)
  13333. + struct mptcp_loc6 loc;
  13334. +
  13335. + loc.addr = inet6_sk(meta_sk)->saddr;
  13336. + loc.loc6_id = 0;
  13337. + loc.low_prio = 0;
  13338. +
  13339. + mptcp_init6_subsockets(meta_sk, &loc, &mpcb->remaddr6[0]);
  13340. +#endif
  13341. + }
  13342. + goto next_subflow;
  13343. + }
  13344. +
  13345. +exit:
  13346. + release_sock(meta_sk);
  13347. + mutex_unlock(&mpcb->mpcb_mutex);
  13348. + sock_put(meta_sk);
  13349. +}
  13350. +
  13351. +static void ndiffports_new_session(struct sock *meta_sk, int index)
  13352. +{
  13353. + struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
  13354. + struct ndiffports_priv *fmp = (struct ndiffports_priv *)&mpcb->mptcp_pm[0];
  13355. +
  13356. + /* Initialize workqueue-struct */
  13357. + INIT_WORK(&fmp->subflow_work, create_subflow_worker);
  13358. + fmp->mpcb = mpcb;
  13359. +}
  13360. +
  13361. +static void ndiffports_create_subflows(struct sock *meta_sk)
  13362. +{
  13363. + struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
  13364. + struct ndiffports_priv *pm_priv = (struct ndiffports_priv *)&mpcb->mptcp_pm[0];
  13365. +
  13366. + if (mpcb->infinite_mapping_snd || mpcb->infinite_mapping_rcv ||
  13367. + mpcb->send_infinite_mapping ||
  13368. + mpcb->server_side || sock_flag(meta_sk, SOCK_DEAD))
  13369. + return;
  13370. +
  13371. + if (!work_pending(&pm_priv->subflow_work)) {
  13372. + sock_hold(meta_sk);
  13373. + queue_work(mptcp_wq, &pm_priv->subflow_work);
  13374. + }
  13375. +}
  13376. +
  13377. +static int ndiffports_get_local_index(sa_family_t family, union inet_addr *addr,
  13378. + struct net *net)
  13379. +{
  13380. + return 0;
  13381. +}
  13382. +
  13383. +static struct mptcp_pm_ops ndiffports __read_mostly = {
  13384. + .new_session = ndiffports_new_session,
  13385. + .fully_established = ndiffports_create_subflows,
  13386. + .get_local_index = ndiffports_get_local_index,
  13387. + .get_local_id = ndiffports_get_local_index,
  13388. + .name = "ndiffports",
  13389. + .owner = THIS_MODULE,
  13390. +};
  13391. +
  13392. +static struct ctl_table ndiff_table[] = {
  13393. + {
  13394. + .procname = "mptcp_ndiffports",
  13395. + .data = &sysctl_mptcp_ndiffports,
  13396. + .maxlen = sizeof(int),
  13397. + .mode = 0644,
  13398. + .proc_handler = &proc_dointvec
  13399. + },
  13400. + { }
  13401. +};
  13402. +
  13403. +struct ctl_table_header *mptcp_sysctl;
  13404. +
  13405. +/* General initialization of MPTCP_PM */
  13406. +static int __init ndiffports_register(void)
  13407. +{
  13408. + BUILD_BUG_ON(sizeof(struct ndiffports_priv) > MPTCP_PM_SIZE);
  13409. +
  13410. + mptcp_sysctl = register_net_sysctl(&init_net, "net/mptcp", ndiff_table);
  13411. + if (!mptcp_sysctl)
  13412. + goto exit;
  13413. +
  13414. + if (mptcp_register_path_manager(&ndiffports))
  13415. + goto pm_failed;
  13416. +
  13417. + return 0;
  13418. +
  13419. +pm_failed:
  13420. + unregister_net_sysctl_table(mptcp_sysctl);
  13421. +exit:
  13422. + return -1;
  13423. +}
  13424. +
  13425. +static void ndiffports_unregister(void)
  13426. +{
  13427. + mptcp_unregister_path_manager(&ndiffports);
  13428. + unregister_net_sysctl_table(mptcp_sysctl);
  13429. +}
  13430. +
  13431. +module_init(ndiffports_register);
  13432. +module_exit(ndiffports_unregister);
  13433. +
  13434. +MODULE_AUTHOR("Christoph Paasch");
  13435. +MODULE_LICENSE("GPL");
  13436. +MODULE_DESCRIPTION("NDIFF-PORTS MPTCP");
  13437. +MODULE_VERSION("0.88");
  13438. diff -Nur linux-3.14.45.orig/net/mptcp/mptcp_ofo_queue.c linux-3.14.45/net/mptcp/mptcp_ofo_queue.c
  13439. --- linux-3.14.45.orig/net/mptcp/mptcp_ofo_queue.c 1970-01-01 01:00:00.000000000 +0100
  13440. +++ linux-3.14.45/net/mptcp/mptcp_ofo_queue.c 2015-06-24 14:15:48.931862523 +0200
  13441. @@ -0,0 +1,278 @@
  13442. +/*
  13443. + * MPTCP implementation - Fast algorithm for MPTCP meta-reordering
  13444. + *
  13445. + * Initial Design & Implementation:
  13446. + * Sébastien Barré <sebastien.barre@uclouvain.be>
  13447. + *
  13448. + * Current Maintainer & Author:
  13449. + * Christoph Paasch <christoph.paasch@uclouvain.be>
  13450. + *
  13451. + * Additional authors:
  13452. + * Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
  13453. + * Gregory Detal <gregory.detal@uclouvain.be>
  13454. + * Fabien Duchêne <fabien.duchene@uclouvain.be>
  13455. + * Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
  13456. + * Lavkesh Lahngir <lavkesh51@gmail.com>
  13457. + * Andreas Ripke <ripke@neclab.eu>
  13458. + * Vlad Dogaru <vlad.dogaru@intel.com>
  13459. + * Octavian Purdila <octavian.purdila@intel.com>
  13460. + * John Ronan <jronan@tssg.org>
  13461. + * Catalin Nicutar <catalin.nicutar@gmail.com>
  13462. + * Brandon Heller <brandonh@stanford.edu>
  13463. + *
  13464. + * This program is free software; you can redistribute it and/or
  13465. + * modify it under the terms of the GNU General Public License
  13466. + * as published by the Free Software Foundation; either version
  13467. + * 2 of the License, or (at your option) any later version.
  13468. + */
  13469. +
  13470. +#include <linux/skbuff.h>
  13471. +#include <linux/slab.h>
  13472. +#include <net/tcp.h>
  13473. +#include <net/mptcp.h>
  13474. +
  13475. +void mptcp_remove_shortcuts(const struct mptcp_cb *mpcb,
  13476. + const struct sk_buff *skb)
  13477. +{
  13478. + struct tcp_sock *tp;
  13479. +
  13480. + mptcp_for_each_tp(mpcb, tp) {
  13481. + if (tp->mptcp->shortcut_ofoqueue == skb) {
  13482. + tp->mptcp->shortcut_ofoqueue = NULL;
  13483. + return;
  13484. + }
  13485. + }
  13486. +}
  13487. +
  13488. +/* Does 'skb' fits after 'here' in the queue 'head' ?
  13489. + * If yes, we queue it and return 1
  13490. + */
  13491. +static int mptcp_ofo_queue_after(struct sk_buff_head *head,
  13492. + struct sk_buff *skb, struct sk_buff *here,
  13493. + struct tcp_sock *tp)
  13494. +{
  13495. + struct sock *meta_sk = tp->meta_sk;
  13496. + struct tcp_sock *meta_tp = tcp_sk(meta_sk);
  13497. + u32 seq = TCP_SKB_CB(skb)->seq;
  13498. + u32 end_seq = TCP_SKB_CB(skb)->end_seq;
  13499. +
  13500. + /* We want to queue skb after here, thus seq >= end_seq */
  13501. + if (before(seq, TCP_SKB_CB(here)->end_seq))
  13502. + return 0;
  13503. +
  13504. + if (seq == TCP_SKB_CB(here)->end_seq) {
  13505. + bool fragstolen = false;
  13506. +
  13507. + if (!tcp_try_coalesce(meta_sk, here, skb, &fragstolen)) {
  13508. + __skb_queue_after(&meta_tp->out_of_order_queue, here, skb);
  13509. + return 1;
  13510. + } else {
  13511. + kfree_skb_partial(skb, fragstolen);
  13512. + return -1;
  13513. + }
  13514. + }
  13515. +
  13516. + /* If here is the last one, we can always queue it */
  13517. + if (skb_queue_is_last(head, here)) {
  13518. + __skb_queue_after(head, here, skb);
  13519. + return 1;
  13520. + } else {
  13521. + struct sk_buff *skb1 = skb_queue_next(head, here);
  13522. + /* It's not the last one, but does it fits between 'here' and
  13523. + * the one after 'here' ? Thus, does end_seq <= after_here->seq
  13524. + */
  13525. + if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) {
  13526. + __skb_queue_after(head, here, skb);
  13527. + return 1;
  13528. + }
  13529. + }
  13530. +
  13531. + return 0;
  13532. +}
  13533. +
  13534. +static void try_shortcut(struct sk_buff *shortcut, struct sk_buff *skb,
  13535. + struct sk_buff_head *head, struct tcp_sock *tp)
  13536. +{
  13537. + struct sock *meta_sk = tp->meta_sk;
  13538. + struct tcp_sock *tp_it, *meta_tp = tcp_sk(meta_sk);
  13539. + struct mptcp_cb *mpcb = meta_tp->mpcb;
  13540. + struct sk_buff *skb1, *best_shortcut = NULL;
  13541. + u32 seq = TCP_SKB_CB(skb)->seq;
  13542. + u32 end_seq = TCP_SKB_CB(skb)->end_seq;
  13543. + u32 distance = 0xffffffff;
  13544. +
  13545. + /* First, check the tp's shortcut */
  13546. + if (!shortcut) {
  13547. + if (skb_queue_empty(head)) {
  13548. + __skb_queue_head(head, skb);
  13549. + goto end;
  13550. + }
  13551. + } else {
  13552. + int ret = mptcp_ofo_queue_after(head, skb, shortcut, tp);
  13553. + /* Does the tp's shortcut is a hit? If yes, we insert. */
  13554. +
  13555. + if (ret) {
  13556. + skb = (ret > 0) ? skb : NULL;
  13557. + goto end;
  13558. + }
  13559. + }
  13560. +
  13561. + /* Check the shortcuts of the other subsockets. */
  13562. + mptcp_for_each_tp(mpcb, tp_it) {
  13563. + shortcut = tp_it->mptcp->shortcut_ofoqueue;
  13564. + /* Can we queue it here? If yes, do so! */
  13565. + if (shortcut) {
  13566. + int ret = mptcp_ofo_queue_after(head, skb, shortcut, tp);
  13567. +
  13568. + if (ret) {
  13569. + skb = (ret > 0) ? skb : NULL;
  13570. + goto end;
  13571. + }
  13572. + }
  13573. +
  13574. + /* Could not queue it, check if we are close.
  13575. + * We are looking for a shortcut, close enough to seq to
  13576. + * set skb1 prematurely and thus improve the subsequent lookup,
  13577. + * which tries to find a skb1 so that skb1->seq <= seq.
  13578. + *
  13579. + * So, here we only take shortcuts, whose shortcut->seq > seq,
  13580. + * and minimize the distance between shortcut->seq and seq and
  13581. + * set best_shortcut to this one with the minimal distance.
  13582. + *
  13583. + * That way, the subsequent while-loop is shortest.
  13584. + */
  13585. + if (shortcut && after(TCP_SKB_CB(shortcut)->seq, seq)) {
  13586. + /* Are we closer than the current best shortcut? */
  13587. + if ((u32)(TCP_SKB_CB(shortcut)->seq - seq) < distance) {
  13588. + distance = (u32)(TCP_SKB_CB(shortcut)->seq - seq);
  13589. + best_shortcut = shortcut;
  13590. + }
  13591. + }
  13592. + }
  13593. +
  13594. + if (best_shortcut)
  13595. + skb1 = best_shortcut;
  13596. + else
  13597. + skb1 = skb_peek_tail(head);
  13598. +
  13599. + if (seq == TCP_SKB_CB(skb1)->end_seq) {
  13600. + bool fragstolen = false;
  13601. +
  13602. + if (!tcp_try_coalesce(meta_sk, skb1, skb, &fragstolen)) {
  13603. + __skb_queue_after(&meta_tp->out_of_order_queue, skb1, skb);
  13604. + } else {
  13605. + kfree_skb_partial(skb, fragstolen);
  13606. + skb = NULL;
  13607. + }
  13608. +
  13609. + goto end;
  13610. + }
  13611. +
  13612. + /* Find the insertion point, starting from best_shortcut if available.
  13613. + *
  13614. + * Inspired from tcp_data_queue_ofo.
  13615. + */
  13616. + while (1) {
  13617. + /* skb1->seq <= seq */
  13618. + if (!after(TCP_SKB_CB(skb1)->seq, seq))
  13619. + break;
  13620. + if (skb_queue_is_first(head, skb1)) {
  13621. + skb1 = NULL;
  13622. + break;
  13623. + }
  13624. + skb1 = skb_queue_prev(head, skb1);
  13625. + }
  13626. +
  13627. + /* Do skb overlap to previous one? */
  13628. + if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
  13629. + if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
  13630. + /* All the bits are present. */
  13631. + __kfree_skb(skb);
  13632. + skb = NULL;
  13633. + goto end;
  13634. + }
  13635. + if (seq == TCP_SKB_CB(skb1)->seq) {
  13636. + if (skb_queue_is_first(head, skb1))
  13637. + skb1 = NULL;
  13638. + else
  13639. + skb1 = skb_queue_prev(head, skb1);
  13640. + }
  13641. + }
  13642. + if (!skb1)
  13643. + __skb_queue_head(head, skb);
  13644. + else
  13645. + __skb_queue_after(head, skb1, skb);
  13646. +
  13647. + /* And clean segments covered by new one as whole. */
  13648. + while (!skb_queue_is_last(head, skb)) {
  13649. + skb1 = skb_queue_next(head, skb);
  13650. +
  13651. + if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
  13652. + break;
  13653. +
  13654. + __skb_unlink(skb1, head);
  13655. + mptcp_remove_shortcuts(mpcb, skb1);
  13656. + __kfree_skb(skb1);
  13657. + }
  13658. +
  13659. +end:
  13660. + if (skb) {
  13661. + skb_set_owner_r(skb, meta_sk);
  13662. + tp->mptcp->shortcut_ofoqueue = skb;
  13663. + }
  13664. +
  13665. + return;
  13666. +}
  13667. +
  13668. +/**
  13669. + * @sk: the subflow that received this skb.
  13670. + */
  13671. +void mptcp_add_meta_ofo_queue(struct sock *meta_sk, struct sk_buff *skb,
  13672. + struct sock *sk)
  13673. +{
  13674. + struct tcp_sock *tp = tcp_sk(sk);
  13675. +
  13676. + try_shortcut(tp->mptcp->shortcut_ofoqueue, skb,
  13677. + &tcp_sk(meta_sk)->out_of_order_queue, tp);
  13678. +}
  13679. +
  13680. +void mptcp_ofo_queue(struct sock *meta_sk)
  13681. +{
  13682. + struct tcp_sock *meta_tp = tcp_sk(meta_sk);
  13683. + struct sk_buff *skb;
  13684. +
  13685. + while ((skb = skb_peek(&meta_tp->out_of_order_queue)) != NULL) {
  13686. + u32 old_rcv_nxt = meta_tp->rcv_nxt;
  13687. + if (after(TCP_SKB_CB(skb)->seq, meta_tp->rcv_nxt))
  13688. + break;
  13689. +
  13690. + if (!after(TCP_SKB_CB(skb)->end_seq, meta_tp->rcv_nxt)) {
  13691. + __skb_unlink(skb, &meta_tp->out_of_order_queue);
  13692. + mptcp_remove_shortcuts(meta_tp->mpcb, skb);
  13693. + __kfree_skb(skb);
  13694. + continue;
  13695. + }
  13696. +
  13697. + __skb_unlink(skb, &meta_tp->out_of_order_queue);
  13698. + mptcp_remove_shortcuts(meta_tp->mpcb, skb);
  13699. +
  13700. + __skb_queue_tail(&meta_sk->sk_receive_queue, skb);
  13701. + meta_tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
  13702. + mptcp_check_rcvseq_wrap(meta_tp, old_rcv_nxt);
  13703. +
  13704. + if (tcp_hdr(skb)->fin)
  13705. + mptcp_fin(meta_sk);
  13706. + }
  13707. +}
  13708. +
  13709. +void mptcp_purge_ofo_queue(struct tcp_sock *meta_tp)
  13710. +{
  13711. + struct sk_buff_head *head = &meta_tp->out_of_order_queue;
  13712. + struct sk_buff *skb, *tmp;
  13713. +
  13714. + skb_queue_walk_safe(head, skb, tmp) {
  13715. + __skb_unlink(skb, head);
  13716. + mptcp_remove_shortcuts(meta_tp->mpcb, skb);
  13717. + kfree_skb(skb);
  13718. + }
  13719. +}
  13720. diff -Nur linux-3.14.45.orig/net/mptcp/mptcp_olia.c linux-3.14.45/net/mptcp/mptcp_olia.c
  13721. --- linux-3.14.45.orig/net/mptcp/mptcp_olia.c 1970-01-01 01:00:00.000000000 +0100
  13722. +++ linux-3.14.45/net/mptcp/mptcp_olia.c 2015-06-24 14:15:48.931862523 +0200
  13723. @@ -0,0 +1,314 @@
  13724. +/*
  13725. + * MPTCP implementation - OPPORTUNISTIC LINKED INCREASES CONGESTION CONTROL:
  13726. + *
  13727. + * Algorithm design:
  13728. + * Ramin Khalili <ramin.khalili@epfl.ch>
  13729. + * Nicolas Gast <nicolas.gast@epfl.ch>
  13730. + * Jean-Yves Le Boudec <jean-yves.leboudec@epfl.ch>
  13731. + *
  13732. + * Implementation:
  13733. + * Ramin Khalili <ramin.khalili@epfl.ch>
  13734. + *
  13735. + * Ported to the official MPTCP-kernel:
  13736. + * Christoph Paasch <christoph.paasch@uclouvain.be>
  13737. + *
  13738. + * This program is free software; you can redistribute it and/or
  13739. + * modify it under the terms of the GNU General Public License
  13740. + * as published by the Free Software Foundation; either version
  13741. + * 2 of the License, or (at your option) any later version.
  13742. + */
  13743. +
  13744. +
  13745. +#include <net/tcp.h>
  13746. +#include <net/mptcp.h>
  13747. +
  13748. +#include <linux/module.h>
  13749. +
  13750. +static int scale = 10;
  13751. +
  13752. +struct mptcp_olia {
  13753. + u32 mptcp_loss1;
  13754. + u32 mptcp_loss2;
  13755. + u32 mptcp_loss3;
  13756. + int epsilon_num;
  13757. + u32 epsilon_den;
  13758. + int mptcp_snd_cwnd_cnt;
  13759. +};
  13760. +
  13761. +static inline int mptcp_olia_sk_can_send(const struct sock *sk)
  13762. +{
  13763. + return mptcp_sk_can_send(sk) && tcp_sk(sk)->srtt;
  13764. +}
  13765. +
  13766. +static inline u64 mptcp_olia_scale(u64 val, int scale)
  13767. +{
  13768. + return (u64) val << scale;
  13769. +}
  13770. +
  13771. +/* take care of artificially inflate (see RFC5681)
  13772. + * of cwnd during fast-retransmit phase
  13773. + */
  13774. +static u32 mptcp_get_crt_cwnd(struct sock *sk)
  13775. +{
  13776. + struct inet_connection_sock *icsk = inet_csk(sk);
  13777. +
  13778. + if (icsk->icsk_ca_state == TCP_CA_Recovery)
  13779. + return tcp_sk(sk)->snd_ssthresh;
  13780. + else
  13781. + return tcp_sk(sk)->snd_cwnd;
  13782. +}
  13783. +
  13784. +/* return the dominator of the first term of the increasing term */
  13785. +static u64 mptcp_get_rate(struct mptcp_cb *mpcb , u32 path_rtt)
  13786. +{
  13787. + struct sock *sk;
  13788. + u64 rate = 1; /* We have to avoid a zero-rate because it is used as a divisor */
  13789. +
  13790. + mptcp_for_each_sk(mpcb, sk) {
  13791. + struct tcp_sock *tp = tcp_sk(sk);
  13792. + u64 scaled_num;
  13793. + u32 tmp_cwnd;
  13794. +
  13795. + if (!mptcp_olia_sk_can_send(sk))
  13796. + continue;
  13797. +
  13798. + tmp_cwnd = mptcp_get_crt_cwnd(sk);
  13799. + scaled_num = mptcp_olia_scale(tmp_cwnd, scale) * path_rtt;
  13800. + rate += div_u64(scaled_num , tp->srtt);
  13801. + }
  13802. + rate *= rate;
  13803. + return rate;
  13804. +}
  13805. +
  13806. +/* find the maximum cwnd, used to find set M */
  13807. +static u32 mptcp_get_max_cwnd(struct mptcp_cb *mpcb)
  13808. +{
  13809. + struct sock *sk;
  13810. + u32 best_cwnd = 0;
  13811. +
  13812. + mptcp_for_each_sk(mpcb, sk) {
  13813. + u32 tmp_cwnd;
  13814. +
  13815. + if (!mptcp_olia_sk_can_send(sk))
  13816. + continue;
  13817. +
  13818. + tmp_cwnd = mptcp_get_crt_cwnd(sk);
  13819. + if (tmp_cwnd > best_cwnd)
  13820. + best_cwnd = tmp_cwnd;
  13821. + }
  13822. + return best_cwnd;
  13823. +}
  13824. +
  13825. +static void mptcp_get_epsilon(struct mptcp_cb *mpcb)
  13826. +{
  13827. + struct mptcp_olia *ca;
  13828. + struct tcp_sock *tp;
  13829. + struct sock *sk;
  13830. + u64 tmp_int, tmp_rtt, best_int = 0, best_rtt = 1;
  13831. + u32 max_cwnd = 1, best_cwnd = 1, tmp_cwnd;
  13832. + u8 M = 0, B_not_M = 0;
  13833. +
  13834. + /* TODO - integrate this in the following loop - we just want to iterate once */
  13835. +
  13836. + max_cwnd = mptcp_get_max_cwnd(mpcb);
  13837. +
  13838. + /* find the best path */
  13839. + mptcp_for_each_sk(mpcb, sk) {
  13840. + tp = tcp_sk(sk);
  13841. + ca = inet_csk_ca(sk);
  13842. +
  13843. + if (!mptcp_olia_sk_can_send(sk))
  13844. + continue;
  13845. +
  13846. + tmp_rtt = tp->srtt * tp->srtt;
  13847. + /* TODO - check here and rename variables */
  13848. + tmp_int = max(ca->mptcp_loss3 - ca->mptcp_loss2,
  13849. + ca->mptcp_loss2 - ca->mptcp_loss1);
  13850. +
  13851. + tmp_cwnd = mptcp_get_crt_cwnd(sk);
  13852. + if (tmp_int * best_rtt >= best_int * tmp_rtt) {
  13853. + best_rtt = tmp_rtt;
  13854. + best_int = tmp_int;
  13855. + best_cwnd = tmp_cwnd;
  13856. + }
  13857. + }
  13858. +
  13859. + /* TODO - integrate this here in mptcp_get_max_cwnd and in the previous loop */
  13860. + /* find the size of M and B_not_M */
  13861. + mptcp_for_each_sk(mpcb, sk) {
  13862. + tp = tcp_sk(sk);
  13863. + ca = inet_csk_ca(sk);
  13864. +
  13865. + if (!mptcp_olia_sk_can_send(sk))
  13866. + continue;
  13867. +
  13868. + tmp_cwnd = mptcp_get_crt_cwnd(sk);
  13869. + if (tmp_cwnd == max_cwnd) {
  13870. + M++;
  13871. + } else {
  13872. + tmp_rtt = tp->srtt * tp->srtt;
  13873. + tmp_int = max(ca->mptcp_loss3 - ca->mptcp_loss2,
  13874. + ca->mptcp_loss2 - ca->mptcp_loss1);
  13875. +
  13876. + if (tmp_int * best_rtt == best_int * tmp_rtt)
  13877. + B_not_M++;
  13878. + }
  13879. + }
  13880. +
  13881. + /* check if the path is in M or B_not_M and set the value of epsilon accordingly */
  13882. + mptcp_for_each_sk(mpcb, sk) {
  13883. + tp = tcp_sk(sk);
  13884. + ca = inet_csk_ca(sk);
  13885. +
  13886. + if (!mptcp_olia_sk_can_send(sk))
  13887. + continue;
  13888. +
  13889. + if (B_not_M == 0) {
  13890. + ca->epsilon_num = 0;
  13891. + ca->epsilon_den = 1;
  13892. + } else {
  13893. + tmp_rtt = tp->srtt * tp->srtt;
  13894. + tmp_int = max(ca->mptcp_loss3 - ca->mptcp_loss2,
  13895. + ca->mptcp_loss2 - ca->mptcp_loss1);
  13896. + tmp_cwnd = mptcp_get_crt_cwnd(sk);
  13897. +
  13898. + if (tmp_cwnd < max_cwnd &&
  13899. + tmp_int * best_rtt == best_int * tmp_rtt){
  13900. + ca->epsilon_num = 1;
  13901. + ca->epsilon_den = mpcb->cnt_established * B_not_M;
  13902. + } else if (tmp_cwnd == max_cwnd) {
  13903. + ca->epsilon_num = -1;
  13904. + ca->epsilon_den = mpcb->cnt_established * M;
  13905. + } else {
  13906. + ca->epsilon_num = 0;
  13907. + ca->epsilon_den = 1;
  13908. + }
  13909. + }
  13910. + }
  13911. +
  13912. +}
  13913. +
  13914. +/* setting the initial values */
  13915. +static void mptcp_olia_init(struct sock *sk)
  13916. +{
  13917. + struct tcp_sock *tp = tcp_sk(sk);
  13918. + struct mptcp_olia *ca = inet_csk_ca(sk);
  13919. +
  13920. + if (tp->mpc) {
  13921. + ca->mptcp_loss1 = tp->snd_una;
  13922. + ca->mptcp_loss2 = tp->snd_una;
  13923. + ca->mptcp_loss3 = tp->snd_una;
  13924. + ca->mptcp_snd_cwnd_cnt = 0;
  13925. + ca->epsilon_num = 0;
  13926. + ca->epsilon_den = 1;
  13927. + }
  13928. +}
  13929. +
  13930. +/* updating inter-loss distance and ssthresh */
  13931. +static void mptcp_olia_set_state(struct sock *sk, u8 new_state)
  13932. +{
  13933. + if (!tcp_sk(sk)->mpc)
  13934. + return;
  13935. +
  13936. + if (new_state == TCP_CA_Loss ||
  13937. + new_state == TCP_CA_Recovery || new_state == TCP_CA_CWR) {
  13938. + struct mptcp_olia *ca = inet_csk_ca(sk);
  13939. +
  13940. + if (ca->mptcp_loss3 != ca->mptcp_loss2 &&
  13941. + !inet_csk(sk)->icsk_retransmits) {
  13942. + ca->mptcp_loss1 = ca->mptcp_loss2;
  13943. + ca->mptcp_loss2 = ca->mptcp_loss3;
  13944. + }
  13945. + }
  13946. +
  13947. +}
  13948. +
  13949. +/* main algorithm */
  13950. +static void mptcp_olia_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
  13951. +{
  13952. + struct tcp_sock *tp = tcp_sk(sk);
  13953. + struct mptcp_olia *ca = inet_csk_ca(sk);
  13954. + struct mptcp_cb *mpcb = tp->mpcb;
  13955. +
  13956. + u64 inc_num, inc_den, rate, cwnd_scaled;
  13957. +
  13958. + if (!tp->mpc) {
  13959. + tcp_reno_cong_avoid(sk, ack, acked, in_flight);
  13960. + return;
  13961. + }
  13962. +
  13963. + ca->mptcp_loss3 = tp->snd_una;
  13964. +
  13965. + if (!tcp_is_cwnd_limited(sk, in_flight))
  13966. + return;
  13967. +
  13968. + /* slow start if it is in the safe area */
  13969. + if (tp->snd_cwnd <= tp->snd_ssthresh) {
  13970. + tcp_slow_start(tp, acked);
  13971. + return;
  13972. + }
  13973. +
  13974. + mptcp_get_epsilon(mpcb);
  13975. + rate = mptcp_get_rate(mpcb, tp->srtt);
  13976. + cwnd_scaled = mptcp_olia_scale(tp->snd_cwnd, scale);
  13977. + inc_den = ca->epsilon_den * tp->snd_cwnd * rate ? : 1;
  13978. +
  13979. + /* calculate the increasing term, scaling is used to reduce the rounding effect */
  13980. + if (ca->epsilon_num == -1) {
  13981. + if (ca->epsilon_den * cwnd_scaled * cwnd_scaled < rate) {
  13982. + inc_num = rate - ca->epsilon_den *
  13983. + cwnd_scaled * cwnd_scaled;
  13984. + ca->mptcp_snd_cwnd_cnt -= div64_u64(
  13985. + mptcp_olia_scale(inc_num , scale) , inc_den);
  13986. + } else {
  13987. + inc_num = ca->epsilon_den *
  13988. + cwnd_scaled * cwnd_scaled - rate;
  13989. + ca->mptcp_snd_cwnd_cnt += div64_u64(
  13990. + mptcp_olia_scale(inc_num , scale) , inc_den);
  13991. + }
  13992. + } else {
  13993. + inc_num = ca->epsilon_num * rate +
  13994. + ca->epsilon_den * cwnd_scaled * cwnd_scaled;
  13995. + ca->mptcp_snd_cwnd_cnt += div64_u64(
  13996. + mptcp_olia_scale(inc_num , scale) , inc_den);
  13997. + }
  13998. +
  13999. +
  14000. + if (ca->mptcp_snd_cwnd_cnt >= (1 << scale) - 1) {
  14001. + if (tp->snd_cwnd < tp->snd_cwnd_clamp)
  14002. + tp->snd_cwnd++;
  14003. + ca->mptcp_snd_cwnd_cnt = 0;
  14004. + } else if (ca->mptcp_snd_cwnd_cnt <= 0 - (1 << scale) + 1) {
  14005. + tp->snd_cwnd = max((int) 1 , (int) tp->snd_cwnd - 1);
  14006. + ca->mptcp_snd_cwnd_cnt = 0;
  14007. + }
  14008. +}
  14009. +
  14010. +static struct tcp_congestion_ops mptcp_olia = {
  14011. + .init = mptcp_olia_init,
  14012. + .ssthresh = tcp_reno_ssthresh,
  14013. + .cong_avoid = mptcp_olia_cong_avoid,
  14014. + .set_state = mptcp_olia_set_state,
  14015. + .min_cwnd = tcp_reno_min_cwnd,
  14016. + .owner = THIS_MODULE,
  14017. + .name = "olia",
  14018. +};
  14019. +
  14020. +static int __init mptcp_olia_register(void)
  14021. +{
  14022. + BUILD_BUG_ON(sizeof(struct mptcp_olia) > ICSK_CA_PRIV_SIZE);
  14023. + return tcp_register_congestion_control(&mptcp_olia);
  14024. +}
  14025. +
  14026. +static void __exit mptcp_olia_unregister(void)
  14027. +{
  14028. + tcp_unregister_congestion_control(&mptcp_olia);
  14029. +}
  14030. +
  14031. +module_init(mptcp_olia_register);
  14032. +module_exit(mptcp_olia_unregister);
  14033. +
  14034. +MODULE_AUTHOR("Ramin Khalili, Nicolas Gast, Jean-Yves Le Boudec");
  14035. +MODULE_LICENSE("GPL");
  14036. +MODULE_DESCRIPTION("MPTCP COUPLED CONGESTION CONTROL");
  14037. +MODULE_VERSION("0.1");
  14038. diff -Nur linux-3.14.45.orig/net/mptcp/mptcp_output.c linux-3.14.45/net/mptcp/mptcp_output.c
  14039. --- linux-3.14.45.orig/net/mptcp/mptcp_output.c 1970-01-01 01:00:00.000000000 +0100
  14040. +++ linux-3.14.45/net/mptcp/mptcp_output.c 2015-06-24 14:15:48.931862523 +0200
  14041. @@ -0,0 +1,2255 @@
  14042. +/*
  14043. + * MPTCP implementation - Sending side
  14044. + *
  14045. + * Initial Design & Implementation:
  14046. + * Sébastien Barré <sebastien.barre@uclouvain.be>
  14047. + *
  14048. + * Current Maintainer & Author:
  14049. + * Christoph Paasch <christoph.paasch@uclouvain.be>
  14050. + *
  14051. + * Additional authors:
  14052. + * Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
  14053. + * Gregory Detal <gregory.detal@uclouvain.be>
  14054. + * Fabien Duchêne <fabien.duchene@uclouvain.be>
  14055. + * Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
  14056. + * Lavkesh Lahngir <lavkesh51@gmail.com>
  14057. + * Andreas Ripke <ripke@neclab.eu>
  14058. + * Vlad Dogaru <vlad.dogaru@intel.com>
  14059. + * Octavian Purdila <octavian.purdila@intel.com>
  14060. + * John Ronan <jronan@tssg.org>
  14061. + * Catalin Nicutar <catalin.nicutar@gmail.com>
  14062. + * Brandon Heller <brandonh@stanford.edu>
  14063. + *
  14064. + *
  14065. + * This program is free software; you can redistribute it and/or
  14066. + * modify it under the terms of the GNU General Public License
  14067. + * as published by the Free Software Foundation; either version
  14068. + * 2 of the License, or (at your option) any later version.
  14069. + */
  14070. +
  14071. +#include <linux/kconfig.h>
  14072. +#include <linux/skbuff.h>
  14073. +#include <linux/tcp.h>
  14074. +
  14075. +#include <net/mptcp.h>
  14076. +#include <net/mptcp_v4.h>
  14077. +#include <net/mptcp_v6.h>
  14078. +#include <net/sock.h>
  14079. +
  14080. +static inline int mptcp_pi_to_flag(int pi)
  14081. +{
  14082. + return 1 << (pi - 1);
  14083. +}
  14084. +
  14085. +static inline int mptcp_sub_len_remove_addr(u16 bitfield)
  14086. +{
  14087. + unsigned int c;
  14088. + for (c = 0; bitfield; c++)
  14089. + bitfield &= bitfield - 1;
  14090. + return MPTCP_SUB_LEN_REMOVE_ADDR + c - 1;
  14091. +}
  14092. +
  14093. +int mptcp_sub_len_remove_addr_align(u16 bitfield)
  14094. +{
  14095. + return ALIGN(mptcp_sub_len_remove_addr(bitfield), 4);
  14096. +}
  14097. +EXPORT_SYMBOL(mptcp_sub_len_remove_addr_align);
  14098. +
  14099. +/* If the sub-socket sk available to send the skb? */
  14100. +static int mptcp_is_available(struct sock *sk, struct sk_buff *skb,
  14101. + unsigned int *mss)
  14102. +{
  14103. + struct tcp_sock *tp = tcp_sk(sk);
  14104. + unsigned int mss_now;
  14105. +
  14106. + /* Set of states for which we are allowed to send data */
  14107. + if (!mptcp_sk_can_send(sk))
  14108. + return 0;
  14109. +
  14110. + /* We do not send data on this subflow unless it is
  14111. + * fully established, i.e. the 4th ack has been received.
  14112. + */
  14113. + if (tp->mptcp->pre_established)
  14114. + return 0;
  14115. +
  14116. + if (tp->pf ||
  14117. + (tp->mpcb->noneligible & mptcp_pi_to_flag(tp->mptcp->path_index)))
  14118. + return 0;
  14119. +
  14120. + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) {
  14121. + /* If SACK is disabled, and we got a loss, TCP does not exit
  14122. + * the loss-state until something above high_seq has been acked.
  14123. + * (see tcp_try_undo_recovery)
  14124. + *
  14125. + * high_seq is the snd_nxt at the moment of the RTO. As soon
  14126. + * as we have an RTO, we won't push data on the subflow.
  14127. + * Thus, snd_una can never go beyond high_seq.
  14128. + */
  14129. + if (!tcp_is_reno(tp))
  14130. + return 0;
  14131. + else if (tp->snd_una != tp->high_seq)
  14132. + return 0;
  14133. + }
  14134. +
  14135. + if (!tp->mptcp->fully_established) {
  14136. + /* Make sure that we send in-order data */
  14137. + if (skb && tp->mptcp->second_packet &&
  14138. + tp->mptcp->last_end_data_seq != TCP_SKB_CB(skb)->seq)
  14139. + return 0;
  14140. + }
  14141. +
  14142. + if (!tcp_cwnd_test(tp, skb))
  14143. + return 0;
  14144. +
  14145. + mss_now = tcp_current_mss(sk);
  14146. + /* Don't send on this subflow if we bypass the allowed send-window at
  14147. + * the per-subflow level. Similar to tcp_snd_wnd_test, but manually
  14148. + * calculated end_seq (because here at this point end_seq is still at
  14149. + * the meta-level).
  14150. + */
  14151. + if (skb && after(tp->write_seq + min(skb->len, mss_now), tcp_wnd_end(tp)))
  14152. + return 0;
  14153. +
  14154. + if (mss)
  14155. + *mss = mss_now;
  14156. +
  14157. + return 1;
  14158. +}
  14159. +
  14160. +/* Are we not allowed to reinject this skb on tp? */
  14161. +static int mptcp_dont_reinject_skb(struct tcp_sock *tp, struct sk_buff *skb)
  14162. +{
  14163. + /* If the skb has already been enqueued in this sk, try to find
  14164. + * another one.
  14165. + */
  14166. + return skb &&
  14167. + /* Has the skb already been enqueued into this subsocket? */
  14168. + mptcp_pi_to_flag(tp->mptcp->path_index) & TCP_SKB_CB(skb)->path_mask;
  14169. +}
  14170. +
  14171. +/* This is the scheduler. This function decides on which flow to send
  14172. + * a given MSS. If all subflows are found to be busy, NULL is returned
  14173. + * The flow is selected based on the shortest RTT.
  14174. + * If all paths have full cong windows, we simply return NULL.
  14175. + *
  14176. + * Additionally, this function is aware of the backup-subflows.
  14177. + */
  14178. +static struct sock *get_available_subflow(struct sock *meta_sk,
  14179. + struct sk_buff *skb,
  14180. + unsigned int *mss_now)
  14181. +{
  14182. + struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
  14183. + struct sock *sk, *bestsk = NULL, *lowpriosk = NULL, *backupsk = NULL;
  14184. + unsigned int mss = 0, mss_lowprio = 0, mss_backup = 0;
  14185. + u32 min_time_to_peer = 0xffffffff, lowprio_min_time_to_peer = 0xffffffff;
  14186. + int cnt_backups = 0;
  14187. +
  14188. + /* if there is only one subflow, bypass the scheduling function */
  14189. + if (mpcb->cnt_subflows == 1) {
  14190. + bestsk = (struct sock *)mpcb->connection_list;
  14191. + if (!mptcp_is_available(bestsk, skb, mss_now))
  14192. + bestsk = NULL;
  14193. + return bestsk;
  14194. + }
  14195. +
  14196. + /* Answer data_fin on same subflow!!! */
  14197. + if (meta_sk->sk_shutdown & RCV_SHUTDOWN &&
  14198. + skb && mptcp_is_data_fin(skb)) {
  14199. + mptcp_for_each_sk(mpcb, sk) {
  14200. + if (tcp_sk(sk)->mptcp->path_index == mpcb->dfin_path_index &&
  14201. + mptcp_is_available(sk, skb, mss_now))
  14202. + return sk;
  14203. + }
  14204. + }
  14205. +
  14206. + /* First, find the best subflow */
  14207. + mptcp_for_each_sk(mpcb, sk) {
  14208. + struct tcp_sock *tp = tcp_sk(sk);
  14209. + int this_mss;
  14210. +
  14211. + if (tp->mptcp->rcv_low_prio || tp->mptcp->low_prio)
  14212. + cnt_backups++;
  14213. +
  14214. + if ((tp->mptcp->rcv_low_prio || tp->mptcp->low_prio) &&
  14215. + tp->srtt < lowprio_min_time_to_peer) {
  14216. +
  14217. + if (!mptcp_is_available(sk, skb, &this_mss))
  14218. + continue;
  14219. +
  14220. + if (mptcp_dont_reinject_skb(tp, skb)) {
  14221. + mss_backup = this_mss;
  14222. + backupsk = sk;
  14223. + continue;
  14224. + }
  14225. +
  14226. + lowprio_min_time_to_peer = tp->srtt;
  14227. + lowpriosk = sk;
  14228. + mss_lowprio = this_mss;
  14229. + } else if (!(tp->mptcp->rcv_low_prio || tp->mptcp->low_prio) &&
  14230. + tp->srtt < min_time_to_peer) {
  14231. + if (!mptcp_is_available(sk, skb, &this_mss))
  14232. + continue;
  14233. +
  14234. + if (mptcp_dont_reinject_skb(tp, skb)) {
  14235. + mss_backup = this_mss;
  14236. + backupsk = sk;
  14237. + continue;
  14238. + }
  14239. +
  14240. + min_time_to_peer = tp->srtt;
  14241. + bestsk = sk;
  14242. + mss = this_mss;
  14243. + }
  14244. + }
  14245. +
  14246. + if (mpcb->cnt_established == cnt_backups && lowpriosk) {
  14247. + mss = mss_lowprio;
  14248. + sk = lowpriosk;
  14249. + } else if (bestsk) {
  14250. + sk = bestsk;
  14251. + } else if (backupsk){
  14252. + /* It has been sent on all subflows once - let's give it a
  14253. + * chance again by restarting its pathmask.
  14254. + */
  14255. + if (skb)
  14256. + TCP_SKB_CB(skb)->path_mask = 0;
  14257. + mss = mss_backup;
  14258. + sk = backupsk;
  14259. + }
  14260. +
  14261. + if (mss_now)
  14262. + *mss_now = mss;
  14263. +
  14264. + return sk;
  14265. +}
  14266. +
  14267. +static struct mp_dss *mptcp_skb_find_dss(const struct sk_buff *skb)
  14268. +{
  14269. + if (!mptcp_is_data_seq(skb))
  14270. + return NULL;
  14271. +
  14272. + return (struct mp_dss *)(skb->data - (MPTCP_SUB_LEN_DSS_ALIGN +
  14273. + MPTCP_SUB_LEN_ACK_ALIGN +
  14274. + MPTCP_SUB_LEN_SEQ_ALIGN));
  14275. +}
  14276. +
  14277. +/* get the data-seq and end-data-seq and store them again in the
  14278. + * tcp_skb_cb
  14279. + */
  14280. +static int mptcp_reconstruct_mapping(struct sk_buff *skb, struct sk_buff *orig_skb)
  14281. +{
  14282. + struct mp_dss *mpdss = mptcp_skb_find_dss(orig_skb);
  14283. + u32 *p32;
  14284. + u16 *p16;
  14285. +
  14286. + if (!mpdss || !mpdss->M)
  14287. + return 1;
  14288. +
  14289. + /* Move the pointer to the data-seq */
  14290. + p32 = (u32 *)mpdss;
  14291. + p32++;
  14292. + if (mpdss->A) {
  14293. + p32++;
  14294. + if (mpdss->a)
  14295. + p32++;
  14296. + }
  14297. +
  14298. + TCP_SKB_CB(skb)->seq = ntohl(*p32);
  14299. +
  14300. + /* Get the data_len to calculate the end_data_seq */
  14301. + p32++;
  14302. + p32++;
  14303. + p16 = (u16 *)p32;
  14304. + TCP_SKB_CB(skb)->end_seq = ntohs(*p16) + TCP_SKB_CB(skb)->seq;
  14305. +
  14306. + return 0;
  14307. +}
  14308. +
  14309. +/* Similar to __pskb_copy and sk_stream_alloc_skb. */
  14310. +static struct sk_buff *mptcp_pskb_copy(struct sk_buff *skb)
  14311. +{
  14312. + struct sk_buff *n;
  14313. + /* The TCP header must be at least 32-bit aligned. */
  14314. + int size = ALIGN(skb_headlen(skb), 4);
  14315. +
  14316. + n = alloc_skb_fclone(size + MAX_TCP_HEADER, GFP_ATOMIC);
  14317. + if (!n)
  14318. + return NULL;
  14319. +
  14320. + /* Set the data pointer */
  14321. + skb_reserve(n, MAX_TCP_HEADER);
  14322. + /* Set the tail pointer and length */
  14323. + skb_put(n, skb_headlen(skb));
  14324. + /* Copy the bytes */
  14325. + skb_copy_from_linear_data(skb, n->data, n->len);
  14326. +
  14327. + n->truesize += skb->data_len;
  14328. + n->data_len = skb->data_len;
  14329. + n->len = skb->len;
  14330. +
  14331. + if (skb_shinfo(skb)->nr_frags) {
  14332. + int i;
  14333. +
  14334. + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
  14335. + if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
  14336. + kfree_skb(n);
  14337. + n = NULL;
  14338. + goto out;
  14339. + }
  14340. + }
  14341. + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  14342. + skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
  14343. + skb_frag_ref(skb, i);
  14344. + }
  14345. + skb_shinfo(n)->nr_frags = i;
  14346. + }
  14347. +
  14348. + if (skb_has_frag_list(skb)) {
  14349. + skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
  14350. + skb_clone_fraglist(n);
  14351. + }
  14352. +
  14353. + copy_skb_header(n, skb);
  14354. +out:
  14355. + return n;
  14356. +}
  14357. +
  14358. +/* Reinject data from one TCP subflow to the meta_sk. If sk == NULL, we are
  14359. + * coming from the meta-retransmit-timer
  14360. + */
  14361. +static void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk,
  14362. + struct sock *sk, int clone_it)
  14363. +{
  14364. + struct sk_buff *skb, *skb1;
  14365. + struct tcp_sock *meta_tp = tcp_sk(meta_sk);
  14366. + struct mptcp_cb *mpcb = meta_tp->mpcb;
  14367. + u32 seq, end_seq;
  14368. +
  14369. + if (clone_it) {
  14370. + /* pskb_copy is necessary here, because the TCP/IP-headers
  14371. + * will be changed when it's going to be reinjected on another
  14372. + * subflow.
  14373. + */
  14374. + skb = mptcp_pskb_copy(orig_skb);
  14375. + } else {
  14376. + __skb_unlink(orig_skb, &sk->sk_write_queue);
  14377. + sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
  14378. + sk->sk_wmem_queued -= orig_skb->truesize;
  14379. + sk_mem_uncharge(sk, orig_skb->truesize);
  14380. + skb = orig_skb;
  14381. + }
  14382. + if (unlikely(!skb))
  14383. + return;
  14384. +
  14385. + if (sk && mptcp_reconstruct_mapping(skb, orig_skb)) {
  14386. + __kfree_skb(skb);
  14387. + return;
  14388. + }
  14389. +
  14390. + skb->sk = meta_sk;
  14391. +
  14392. + /* If it reached already the destination, we don't have to reinject it */
  14393. + if (!after(TCP_SKB_CB(skb)->end_seq, meta_tp->snd_una)) {
  14394. + __kfree_skb(skb);
  14395. + return;
  14396. + }
  14397. +
  14398. + /* Only reinject segments that are fully covered by the mapping */
  14399. + if (skb->len + (mptcp_is_data_fin(skb) ? 1 : 0) !=
  14400. + TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq) {
  14401. + u32 seq = TCP_SKB_CB(skb)->seq;
  14402. + u32 end_seq = TCP_SKB_CB(skb)->end_seq;
  14403. +
  14404. + __kfree_skb(skb);
  14405. +
  14406. + /* Ok, now we have to look for the full mapping in the meta
  14407. + * send-queue :S
  14408. + */
  14409. + tcp_for_write_queue(skb, meta_sk) {
  14410. + /* Not yet at the mapping? */
  14411. + if (before(TCP_SKB_CB(skb)->seq, seq))
  14412. + continue;
  14413. + /* We have passed by the mapping */
  14414. + if (after(TCP_SKB_CB(skb)->end_seq, end_seq))
  14415. + return;
  14416. +
  14417. + __mptcp_reinject_data(skb, meta_sk, NULL, 1);
  14418. + }
  14419. + return;
  14420. + }
  14421. +
  14422. + /* If it's empty, just add */
  14423. + if (skb_queue_empty(&mpcb->reinject_queue)) {
  14424. + skb_queue_head(&mpcb->reinject_queue, skb);
  14425. + return;
  14426. + }
  14427. +
  14428. + /* Find place to insert skb - or even we can 'drop' it, as the
  14429. + * data is already covered by other skb's in the reinject-queue.
  14430. + *
  14431. + * This is inspired by code from tcp_data_queue.
  14432. + */
  14433. +
  14434. + skb1 = skb_peek_tail(&mpcb->reinject_queue);
  14435. + seq = TCP_SKB_CB(skb)->seq;
  14436. + while (1) {
  14437. + if (!after(TCP_SKB_CB(skb1)->seq, seq))
  14438. + break;
  14439. + if (skb_queue_is_first(&mpcb->reinject_queue, skb1)) {
  14440. + skb1 = NULL;
  14441. + break;
  14442. + }
  14443. + skb1 = skb_queue_prev(&mpcb->reinject_queue, skb1);
  14444. + }
  14445. +
  14446. + /* Do skb overlap to previous one? */
  14447. + end_seq = TCP_SKB_CB(skb)->end_seq;
  14448. + if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
  14449. + if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
  14450. + /* All the bits are present. Don't reinject */
  14451. + __kfree_skb(skb);
  14452. + return;
  14453. + }
  14454. + if (seq == TCP_SKB_CB(skb1)->seq) {
  14455. + if (skb_queue_is_first(&mpcb->reinject_queue, skb1))
  14456. + skb1 = NULL;
  14457. + else
  14458. + skb1 = skb_queue_prev(&mpcb->reinject_queue, skb1);
  14459. + }
  14460. + }
  14461. + if (!skb1)
  14462. + __skb_queue_head(&mpcb->reinject_queue, skb);
  14463. + else
  14464. + __skb_queue_after(&mpcb->reinject_queue, skb1, skb);
  14465. +
  14466. + /* And clean segments covered by new one as whole. */
  14467. + while (!skb_queue_is_last(&mpcb->reinject_queue, skb)) {
  14468. + skb1 = skb_queue_next(&mpcb->reinject_queue, skb);
  14469. +
  14470. + if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
  14471. + break;
  14472. +
  14473. + __skb_unlink(skb1, &mpcb->reinject_queue);
  14474. + __kfree_skb(skb1);
  14475. + }
  14476. + return;
  14477. +}
  14478. +
  14479. +/* Inserts data into the reinject queue */
  14480. +void mptcp_reinject_data(struct sock *sk, int clone_it)
  14481. +{
  14482. + struct sk_buff *skb_it, *tmp;
  14483. + struct tcp_sock *tp = tcp_sk(sk);
  14484. + struct sock *meta_sk = tp->meta_sk;
  14485. +
  14486. + /* It has already been closed - there is really no point in reinjecting */
  14487. + if (meta_sk->sk_state == TCP_CLOSE)
  14488. + return;
  14489. +
  14490. + skb_queue_walk_safe(&sk->sk_write_queue, skb_it, tmp) {
  14491. + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb_it);
  14492. + /* Subflow syn's and fin's are not reinjected.
  14493. + *
  14494. + * As well as empty subflow-fins with a data-fin.
  14495. + * They are reinjected below (without the subflow-fin-flag)
  14496. + */
  14497. + if (tcb->tcp_flags & TCPHDR_SYN ||
  14498. + (tcb->tcp_flags & TCPHDR_FIN && !mptcp_is_data_fin(skb_it)) ||
  14499. + (tcb->tcp_flags & TCPHDR_FIN && mptcp_is_data_fin(skb_it) && !skb_it->len))
  14500. + continue;
  14501. +
  14502. + __mptcp_reinject_data(skb_it, meta_sk, sk, clone_it);
  14503. + }
  14504. +
  14505. + skb_it = tcp_write_queue_tail(meta_sk);
  14506. + /* If sk has sent the empty data-fin, we have to reinject it too. */
  14507. + if (skb_it && mptcp_is_data_fin(skb_it) && skb_it->len == 0 &&
  14508. + TCP_SKB_CB(skb_it)->path_mask & mptcp_pi_to_flag(tp->mptcp->path_index)) {
  14509. + __mptcp_reinject_data(skb_it, meta_sk, NULL, 1);
  14510. + }
  14511. +
  14512. + mptcp_push_pending_frames(meta_sk);
  14513. +
  14514. + tp->pf = 1;
  14515. +}
  14516. +EXPORT_SYMBOL(mptcp_reinject_data);
  14517. +
  14518. +static void mptcp_combine_dfin(struct sk_buff *skb, struct sock *meta_sk,
  14519. + struct sock *subsk)
  14520. +{
  14521. + struct tcp_sock *meta_tp = tcp_sk(meta_sk);
  14522. + struct mptcp_cb *mpcb = meta_tp->mpcb;
  14523. + struct sock *sk_it;
  14524. + int all_empty = 1, all_acked;
  14525. +
  14526. + /* In infinite mapping we always try to combine */
  14527. + if (mpcb->infinite_mapping_snd && tcp_close_state(subsk)) {
  14528. + subsk->sk_shutdown |= SEND_SHUTDOWN;
  14529. + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN;
  14530. + return;
  14531. + }
  14532. +
  14533. + /* Don't combine, if they didn't combine - otherwise we end up in
  14534. + * TIME_WAIT, even if our app is smart enough to avoid it
  14535. + */
  14536. + if (meta_sk->sk_shutdown & RCV_SHUTDOWN) {
  14537. + if (!mpcb->dfin_combined)
  14538. + return;
  14539. + }
  14540. +
  14541. + /* If no other subflow has data to send, we can combine */
  14542. + mptcp_for_each_sk(mpcb, sk_it) {
  14543. + if (!mptcp_sk_can_send(sk_it))
  14544. + continue;
  14545. +
  14546. + if (!tcp_write_queue_empty(sk_it))
  14547. + all_empty = 0;
  14548. + }
  14549. +
  14550. + /* If all data has been DATA_ACKed, we can combine.
  14551. + * -1, because the data_fin consumed one byte
  14552. + */
  14553. + all_acked = (meta_tp->snd_una == (meta_tp->write_seq - 1));
  14554. +
  14555. + if ((all_empty || all_acked) && tcp_close_state(subsk)) {
  14556. + subsk->sk_shutdown |= SEND_SHUTDOWN;
  14557. + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN;
  14558. + }
  14559. +}
  14560. +
  14561. +static struct sk_buff *mptcp_skb_entail(struct sock *sk, struct sk_buff *skb,
  14562. + int reinject)
  14563. +{
  14564. + __be32 *ptr;
  14565. + __u16 data_len;
  14566. + struct mp_dss *mdss;
  14567. + struct tcp_sock *tp = tcp_sk(sk);
  14568. + struct sock *meta_sk = mptcp_meta_sk(sk);
  14569. + struct mptcp_cb *mpcb = tp->mpcb;
  14570. + struct tcp_skb_cb *tcb;
  14571. + struct sk_buff *subskb = NULL;
  14572. +
  14573. + if (!reinject)
  14574. + TCP_SKB_CB(skb)->mptcp_flags |= (mpcb->snd_hiseq_index ?
  14575. + MPTCPHDR_SEQ64_INDEX : 0);
  14576. +
  14577. + subskb = mptcp_pskb_copy(skb);
  14578. + if (!subskb)
  14579. + return NULL;
  14580. +
  14581. + TCP_SKB_CB(skb)->path_mask |= mptcp_pi_to_flag(tp->mptcp->path_index);
  14582. +
  14583. + if (!(sk->sk_route_caps & NETIF_F_ALL_CSUM) &&
  14584. + skb->ip_summed == CHECKSUM_PARTIAL) {
  14585. + subskb->csum = skb->csum = skb_checksum(skb, 0, skb->len, 0);
  14586. + subskb->ip_summed = skb->ip_summed = CHECKSUM_NONE;
  14587. + }
  14588. +
  14589. + /* The subskb is going in the subflow send-queue. Its path-mask
  14590. + * is not needed anymore and MUST be set to 0, as the path-mask
  14591. + * is a union with inet_skb_param.
  14592. + */
  14593. + tcb = TCP_SKB_CB(subskb);
  14594. + tcb->path_mask = 0;
  14595. +
  14596. + if (mptcp_is_data_fin(subskb))
  14597. + mptcp_combine_dfin(subskb, meta_sk, sk);
  14598. +
  14599. + if (tp->mpcb->infinite_mapping_snd)
  14600. + goto no_data_seq;
  14601. +
  14602. + if (tp->mpcb->send_infinite_mapping &&
  14603. + !before(tcb->seq, mptcp_meta_tp(tp)->snd_nxt)) {
  14604. + tp->mptcp->fully_established = 1;
  14605. + tp->mpcb->infinite_mapping_snd = 1;
  14606. + tp->mptcp->infinite_cutoff_seq = tp->write_seq;
  14607. + tcb->mptcp_flags |= MPTCPHDR_INF;
  14608. + data_len = 0;
  14609. + } else {
  14610. + data_len = tcb->end_seq - tcb->seq;
  14611. + }
  14612. +
  14613. + /**** Write MPTCP DSS-option to the packet. ****/
  14614. + ptr = (__be32 *)(subskb->data - (MPTCP_SUB_LEN_DSS_ALIGN +
  14615. + MPTCP_SUB_LEN_ACK_ALIGN +
  14616. + MPTCP_SUB_LEN_SEQ_ALIGN));
  14617. +
  14618. + /* Then we start writing it from the start */
  14619. + mdss = (struct mp_dss *)ptr;
  14620. +
  14621. + mdss->kind = TCPOPT_MPTCP;
  14622. + mdss->sub = MPTCP_SUB_DSS;
  14623. + mdss->rsv1 = 0;
  14624. + mdss->rsv2 = 0;
  14625. + mdss->F = (mptcp_is_data_fin(subskb) ? 1 : 0);
  14626. + mdss->m = 0;
  14627. + mdss->M = 1;
  14628. + mdss->a = 0;
  14629. + mdss->A = 1;
  14630. + mdss->len = mptcp_sub_len_dss(mdss, tp->mpcb->dss_csum);
  14631. +
  14632. + ptr++;
  14633. + ptr++; /* data_ack will be set in mptcp_options_write */
  14634. + *ptr++ = htonl(tcb->seq); /* data_seq */
  14635. +
  14636. + /* If it's a non-data DATA_FIN, we set subseq to 0 (draft v7) */
  14637. + if (mptcp_is_data_fin(subskb) && subskb->len == 0)
  14638. + *ptr++ = 0; /* subseq */
  14639. + else
  14640. + *ptr++ = htonl(tp->write_seq - tp->mptcp->snt_isn); /* subseq */
  14641. +
  14642. + if (tp->mpcb->dss_csum && data_len) {
  14643. + __be16 *p16 = (__be16 *)ptr;
  14644. + __be32 hdseq = mptcp_get_highorder_sndbits(subskb, tp->mpcb);
  14645. + __wsum csum;
  14646. + *ptr = htonl(((data_len) << 16) |
  14647. + (TCPOPT_EOL << 8) |
  14648. + (TCPOPT_EOL));
  14649. +
  14650. + csum = csum_partial(ptr - 2, 12, subskb->csum);
  14651. + p16++;
  14652. + *p16++ = csum_fold(csum_partial(&hdseq, sizeof(hdseq), csum));
  14653. + } else {
  14654. + *ptr++ = htonl(((data_len) << 16) |
  14655. + (TCPOPT_NOP << 8) |
  14656. + (TCPOPT_NOP));
  14657. + }
  14658. +
  14659. +no_data_seq:
  14660. + tcb->seq = tp->write_seq;
  14661. + tcb->sacked = 0; /* reset the sacked field: from the point of view
  14662. + * of this subflow, we are sending a brand new
  14663. + * segment */
  14664. + /* Take into account seg len */
  14665. + tp->write_seq += subskb->len + ((tcb->tcp_flags & TCPHDR_FIN) ? 1 : 0);
  14666. + tcb->end_seq = tp->write_seq;
  14667. +
  14668. + /* If it's a non-payload DATA_FIN (also no subflow-fin), the
  14669. + * segment is not part of the subflow but on a meta-only-level
  14670. + */
  14671. + if (!mptcp_is_data_fin(subskb) || tcb->end_seq != tcb->seq) {
  14672. + tcp_add_write_queue_tail(sk, subskb);
  14673. + sk->sk_wmem_queued += subskb->truesize;
  14674. + sk_mem_charge(sk, subskb->truesize);
  14675. + }
  14676. +
  14677. + return subskb;
  14678. +}
  14679. +
  14680. +static void mptcp_sub_event_new_data_sent(struct sock *sk,
  14681. + struct sk_buff *subskb,
  14682. + struct sk_buff *skb)
  14683. +{
  14684. + /* If it's a non-payload DATA_FIN (also no subflow-fin), the
  14685. + * segment is not part of the subflow but on a meta-only-level
  14686. + *
  14687. + * We free it, because it has been queued nowhere.
  14688. + */
  14689. + if (!mptcp_is_data_fin(subskb) ||
  14690. + (TCP_SKB_CB(subskb)->end_seq != TCP_SKB_CB(subskb)->seq)) {
  14691. + tcp_event_new_data_sent(sk, subskb);
  14692. + tcp_sk(sk)->mptcp->second_packet = 1;
  14693. + tcp_sk(sk)->mptcp->last_end_data_seq = TCP_SKB_CB(skb)->end_seq;
  14694. + } else {
  14695. + kfree_skb(subskb);
  14696. + }
  14697. +}
  14698. +
  14699. +/* Handle the packets and sockets after a tcp_transmit_skb failed */
  14700. +static void mptcp_transmit_skb_failed(struct sock *sk, struct sk_buff *skb,
  14701. + struct sk_buff *subskb)
  14702. +{
  14703. + struct tcp_sock *tp = tcp_sk(sk);
  14704. + struct mptcp_cb *mpcb = tp->mpcb;
  14705. +
  14706. + /* No work to do if we are in infinite mapping mode
  14707. + * There is only one subflow left and we cannot send this segment on
  14708. + * another subflow.
  14709. + */
  14710. + if (mpcb->infinite_mapping_snd)
  14711. + return;
  14712. +
  14713. + TCP_SKB_CB(skb)->path_mask &= ~mptcp_pi_to_flag(tp->mptcp->path_index);
  14714. +
  14715. + if (TCP_SKB_CB(subskb)->tcp_flags & TCPHDR_FIN) {
  14716. + /* If it is a subflow-fin we must leave it on the
  14717. + * subflow-send-queue, so that the probe-timer
  14718. + * can retransmit it.
  14719. + */
  14720. + if (!tp->packets_out && !inet_csk(sk)->icsk_pending)
  14721. + inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
  14722. + inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
  14723. + } else if (mptcp_is_data_fin(subskb) &&
  14724. + TCP_SKB_CB(subskb)->end_seq == TCP_SKB_CB(subskb)->seq) {
  14725. + /* An empty data-fin has not been enqueued on the subflow
  14726. + * and thus we free it.
  14727. + */
  14728. +
  14729. + kfree_skb(subskb);
  14730. + } else {
  14731. + /* In all other cases we remove it from the sub-queue.
  14732. + * Other subflows may send it, or the probe-timer will
  14733. + * handle it.
  14734. + */
  14735. + tcp_advance_send_head(sk, subskb);
  14736. +
  14737. + /* tcp_add_write_queue_tail initialized highest_sack. We have
  14738. + * to reset it, if necessary.
  14739. + */
  14740. + if (tp->highest_sack == subskb)
  14741. + tp->highest_sack = NULL;
  14742. +
  14743. + tcp_unlink_write_queue(subskb, sk);
  14744. + tp->write_seq -= subskb->len;
  14745. + sk_wmem_free_skb(sk, subskb);
  14746. + }
  14747. +}
  14748. +
  14749. +/* Function to create two new TCP segments. Shrinks the given segment
  14750. + * to the specified size and appends a new segment with the rest of the
  14751. + * packet to the list. This won't be called frequently, I hope.
  14752. + * Remember, these are still headerless SKBs at this point.
  14753. + */
  14754. +int mptcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
  14755. + unsigned int mss_now, int reinject)
  14756. +{
  14757. + struct tcp_sock *tp = tcp_sk(sk);
  14758. + struct sk_buff *buff;
  14759. + int nsize, old_factor;
  14760. + int nlen;
  14761. + u8 flags;
  14762. + int dsslen = MPTCP_SUB_LEN_DSS_ALIGN + MPTCP_SUB_LEN_ACK_ALIGN +
  14763. + MPTCP_SUB_LEN_SEQ_ALIGN;
  14764. + char dss[MPTCP_SUB_LEN_DSS_ALIGN + MPTCP_SUB_LEN_ACK_ALIGN +
  14765. + MPTCP_SUB_LEN_SEQ_ALIGN];
  14766. +
  14767. + if (WARN_ON(len > skb->len))
  14768. + return -EINVAL;
  14769. +
  14770. + /* DSS-option must be recovered afterwards. */
  14771. + if (!is_meta_sk(sk))
  14772. + memcpy(dss, skb->data - dsslen, dsslen);
  14773. +
  14774. + nsize = skb_headlen(skb) - len;
  14775. + if (nsize < 0)
  14776. + nsize = 0;
  14777. +
  14778. + if (skb_cloned(skb)) {
  14779. + if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
  14780. + return -ENOMEM;
  14781. + /* Recover dss-option */
  14782. + if (!is_meta_sk(sk))
  14783. + memcpy(skb->data - dsslen, dss, dsslen);
  14784. + }
  14785. +
  14786. + /* Get a new skb... force flag on. */
  14787. + buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
  14788. + if (buff == NULL)
  14789. + return -ENOMEM; /* We'll just try again later. */
  14790. +
  14791. + /* See below - if reinject == 1, the buff will be added to the reinject-
  14792. + * queue, which is currently not part of the memory-accounting.
  14793. + */
  14794. + if (reinject != 1) {
  14795. + sk->sk_wmem_queued += buff->truesize;
  14796. + sk_mem_charge(sk, buff->truesize);
  14797. + }
  14798. + nlen = skb->len - len - nsize;
  14799. + buff->truesize += nlen;
  14800. + skb->truesize -= nlen;
  14801. +
  14802. + /* Correct the sequence numbers. */
  14803. + TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
  14804. + TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
  14805. + TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
  14806. +
  14807. + /* PSH and FIN should only be set in the second packet. */
  14808. + flags = TCP_SKB_CB(skb)->tcp_flags;
  14809. + TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
  14810. + TCP_SKB_CB(buff)->tcp_flags = flags;
  14811. + TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
  14812. +
  14813. + flags = TCP_SKB_CB(skb)->mptcp_flags;
  14814. + TCP_SKB_CB(skb)->mptcp_flags = flags & ~(MPTCPHDR_FIN);
  14815. + TCP_SKB_CB(buff)->mptcp_flags = flags;
  14816. +
  14817. + if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
  14818. + /* Copy and checksum data tail into the new buffer. */
  14819. + buff->csum = csum_partial_copy_nocheck(skb->data + len,
  14820. + skb_put(buff, nsize),
  14821. + nsize, 0);
  14822. +
  14823. + skb_trim(skb, len);
  14824. +
  14825. + skb->csum = csum_block_sub(skb->csum, buff->csum, len);
  14826. + } else {
  14827. + skb->ip_summed = CHECKSUM_PARTIAL;
  14828. + skb_split(skb, buff, len);
  14829. + }
  14830. +
  14831. + /* We lost the dss-option when creating buff - put it back! */
  14832. + if (!is_meta_sk(sk))
  14833. + memcpy(buff->data - dsslen, dss, dsslen);
  14834. +
  14835. + buff->ip_summed = skb->ip_summed;
  14836. +
  14837. + /* Looks stupid, but our code really uses when of
  14838. + * skbs, which it never sent before. --ANK
  14839. + */
  14840. + TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
  14841. + buff->tstamp = skb->tstamp;
  14842. +
  14843. + old_factor = tcp_skb_pcount(skb);
  14844. +
  14845. + /* Fix up tso_factor for both original and new SKB. */
  14846. + tcp_set_skb_tso_segs(sk, skb, mss_now);
  14847. + tcp_set_skb_tso_segs(sk, buff, mss_now);
  14848. +
  14849. + /* If this packet has been sent out already, we must
  14850. + * adjust the various packet counters.
  14851. + */
  14852. + if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq) && reinject != 1) {
  14853. + int diff = old_factor - tcp_skb_pcount(skb) -
  14854. + tcp_skb_pcount(buff);
  14855. +
  14856. + if (diff)
  14857. + tcp_adjust_pcount(sk, skb, diff);
  14858. + }
  14859. +
  14860. + /* Link BUFF into the send queue. */
  14861. + skb_header_release(buff);
  14862. + if (reinject == 1)
  14863. + __skb_queue_after(&tcp_sk(sk)->mpcb->reinject_queue, skb, buff);
  14864. + else
  14865. + tcp_insert_write_queue_after(skb, buff, sk);
  14866. +
  14867. + return 0;
  14868. +}
  14869. +
  14870. +int mptso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
  14871. + unsigned int mss_now, gfp_t gfp, int reinject)
  14872. +{
  14873. + struct sk_buff *buff;
  14874. + int nlen = skb->len - len, old_factor;
  14875. + u8 flags;
  14876. + int dsslen = MPTCP_SUB_LEN_DSS_ALIGN + MPTCP_SUB_LEN_ACK_ALIGN +
  14877. + MPTCP_SUB_LEN_SEQ_ALIGN;
  14878. +
  14879. + /* All of a TSO frame must be composed of paged data. */
  14880. + if (skb->len != skb->data_len)
  14881. + return mptcp_fragment(sk, skb, len, mss_now, reinject);
  14882. +
  14883. + buff = sk_stream_alloc_skb(sk, 0, gfp);
  14884. + if (unlikely(buff == NULL))
  14885. + return -ENOMEM;
  14886. +
  14887. + /* See below - if reinject == 1, the buff will be added to the reinject-
  14888. + * queue, which is currently not part of the memory-accounting.
  14889. + */
  14890. + if (reinject != 1) {
  14891. + sk->sk_wmem_queued += buff->truesize;
  14892. + sk_mem_charge(sk, buff->truesize);
  14893. + }
  14894. + buff->truesize += nlen;
  14895. + skb->truesize -= nlen;
  14896. +
  14897. + /* Correct the sequence numbers. */
  14898. + TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
  14899. + TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
  14900. + TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
  14901. +
  14902. + /* PSH and FIN should only be set in the second packet. */
  14903. + flags = TCP_SKB_CB(skb)->tcp_flags;
  14904. + TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
  14905. + TCP_SKB_CB(buff)->tcp_flags = flags;
  14906. +
  14907. + flags = TCP_SKB_CB(skb)->mptcp_flags;
  14908. + TCP_SKB_CB(skb)->mptcp_flags = flags & ~(MPTCPHDR_FIN);
  14909. + TCP_SKB_CB(buff)->mptcp_flags = flags;
  14910. +
  14911. + /* This packet was never sent out yet, so no SACK bits. */
  14912. + TCP_SKB_CB(buff)->sacked = 0;
  14913. +
  14914. + buff->ip_summed = CHECKSUM_PARTIAL;
  14915. + skb->ip_summed = CHECKSUM_PARTIAL;
  14916. + skb_split(skb, buff, len);
  14917. +
  14918. + /* We lost the dss-option when creating buff - put it back! */
  14919. + if (!is_meta_sk(sk))
  14920. + memcpy(buff->data - dsslen, skb->data - dsslen, dsslen);
  14921. +
  14922. + old_factor = tcp_skb_pcount(skb);
  14923. +
  14924. + /* Fix up tso_factor for both original and new SKB. */
  14925. + tcp_set_skb_tso_segs(sk, skb, mss_now);
  14926. + tcp_set_skb_tso_segs(sk, buff, mss_now);
  14927. +
  14928. + /* If this packet has been sent out already, we must
  14929. + * adjust the various packet counters.
  14930. + */
  14931. + if (!before(tcp_sk(sk)->snd_nxt, TCP_SKB_CB(buff)->end_seq) && reinject != 1) {
  14932. + int diff = old_factor - tcp_skb_pcount(skb) -
  14933. + tcp_skb_pcount(buff);
  14934. +
  14935. + if (diff)
  14936. + tcp_adjust_pcount(sk, skb, diff);
  14937. + }
  14938. +
  14939. + /* Link BUFF into the send queue. */
  14940. + skb_header_release(buff);
  14941. + if (reinject == 1)
  14942. + __skb_queue_after(&tcp_sk(sk)->mpcb->reinject_queue, skb, buff);
  14943. + else
  14944. + tcp_insert_write_queue_after(skb, buff, sk);
  14945. +
  14946. + return 0;
  14947. +}
  14948. +
  14949. +/* Inspired by tcp_write_wakeup */
  14950. +int mptcp_write_wakeup(struct sock *meta_sk)
  14951. +{
  14952. + struct tcp_sock *meta_tp = tcp_sk(meta_sk);
  14953. + struct sk_buff *skb, *subskb;
  14954. +
  14955. + skb = tcp_send_head(meta_sk);
  14956. + if (skb &&
  14957. + before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(meta_tp))) {
  14958. + int err;
  14959. + unsigned int mss;
  14960. + unsigned int seg_size = tcp_wnd_end(meta_tp) - TCP_SKB_CB(skb)->seq;
  14961. + struct sock *subsk = get_available_subflow(meta_sk, skb, &mss);
  14962. + if (!subsk)
  14963. + return -1;
  14964. +
  14965. + if (before(meta_tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
  14966. + meta_tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
  14967. +
  14968. + /* We are probing the opening of a window
  14969. + * but the window size is != 0
  14970. + * must have been a result SWS avoidance ( sender )
  14971. + */
  14972. + if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
  14973. + skb->len > mss) {
  14974. + seg_size = min(seg_size, mss);
  14975. + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
  14976. + if (mptcp_fragment(meta_sk, skb, seg_size, mss, 0))
  14977. + return -1;
  14978. + } else if (!tcp_skb_pcount(skb)) {
  14979. + tcp_set_skb_tso_segs(meta_sk, skb, mss);
  14980. + }
  14981. +
  14982. + subskb = mptcp_skb_entail(subsk, skb, 0);
  14983. + if (!subskb)
  14984. + return -1;
  14985. +
  14986. + TCP_SKB_CB(subskb)->tcp_flags |= TCPHDR_PSH;
  14987. + TCP_SKB_CB(skb)->when = tcp_time_stamp;
  14988. + TCP_SKB_CB(subskb)->when = tcp_time_stamp;
  14989. + err = tcp_transmit_skb(subsk, subskb, 1, GFP_ATOMIC);
  14990. + if (unlikely(err)) {
  14991. + mptcp_transmit_skb_failed(subsk, skb, subskb);
  14992. + return err;
  14993. + }
  14994. +
  14995. + mptcp_check_sndseq_wrap(meta_tp, TCP_SKB_CB(skb)->end_seq -
  14996. + TCP_SKB_CB(skb)->seq);
  14997. + tcp_event_new_data_sent(meta_sk, skb);
  14998. + mptcp_sub_event_new_data_sent(subsk, subskb, skb);
  14999. +
  15000. + return 0;
  15001. + } else {
  15002. + struct sock *sk_it;
  15003. + int ans = 0;
  15004. +
  15005. + if (between(meta_tp->snd_up, meta_tp->snd_una + 1,
  15006. + meta_tp->snd_una + 0xFFFF)) {
  15007. + mptcp_for_each_sk(meta_tp->mpcb, sk_it) {
  15008. + if (mptcp_sk_can_send_ack(sk_it))
  15009. + tcp_xmit_probe_skb(sk_it, 1);
  15010. + }
  15011. + }
  15012. +
  15013. + /* At least one of the tcp_xmit_probe_skb's has to succeed */
  15014. + mptcp_for_each_sk(meta_tp->mpcb, sk_it) {
  15015. + int ret;
  15016. +
  15017. + if (!mptcp_sk_can_send_ack(sk_it))
  15018. + continue;
  15019. +
  15020. + ret = tcp_xmit_probe_skb(sk_it, 0);
  15021. + if (unlikely(ret > 0))
  15022. + ans = ret;
  15023. + }
  15024. + return ans;
  15025. + }
  15026. +}
  15027. +
  15028. +static void mptcp_find_and_set_pathmask(struct sock *meta_sk, struct sk_buff *skb)
  15029. +{
  15030. + struct sk_buff *skb_it;
  15031. +
  15032. + skb_it = tcp_write_queue_head(meta_sk);
  15033. +
  15034. + tcp_for_write_queue_from(skb_it, meta_sk) {
  15035. + if (skb_it == tcp_send_head(meta_sk))
  15036. + break;
  15037. +
  15038. + if (TCP_SKB_CB(skb_it)->seq == TCP_SKB_CB(skb)->seq) {
  15039. + TCP_SKB_CB(skb)->path_mask = TCP_SKB_CB(skb_it)->path_mask;
  15040. + break;
  15041. + }
  15042. + }
  15043. +}
  15044. +
  15045. +static struct sk_buff *mptcp_rcv_buf_optimization(struct sock *sk, int penal)
  15046. +{
  15047. + struct sock *meta_sk;
  15048. + struct tcp_sock *tp = tcp_sk(sk), *tp_it;
  15049. + struct sk_buff *skb_head;
  15050. +
  15051. + if (tp->mpcb->cnt_subflows == 1)
  15052. + return NULL;
  15053. +
  15054. + meta_sk = mptcp_meta_sk(sk);
  15055. + skb_head = tcp_write_queue_head(meta_sk);
  15056. +
  15057. + if (!skb_head || skb_head == tcp_send_head(meta_sk))
  15058. + return NULL;
  15059. +
  15060. + /* If penalization is optional (coming from mptcp_next_segment() and
  15061. + * We are not send-buffer-limited we do not penalize. The retransmission
  15062. + * is just an optimization to fix the idle-time due to the delay before
  15063. + * we wake up the application.
  15064. + */
  15065. + if (!penal && sk_stream_memory_free(meta_sk))
  15066. + goto retrans;
  15067. +
  15068. + /* Only penalize again after an RTT has elapsed */
  15069. + if (tcp_time_stamp - tp->mptcp->last_rbuf_opti < tp->srtt >> 3)
  15070. + goto retrans;
  15071. +
  15072. + /* Half the cwnd of the slow flow */
  15073. + mptcp_for_each_tp(tp->mpcb, tp_it) {
  15074. + if (tp_it != tp &&
  15075. + TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) {
  15076. + if (tp->srtt < tp_it->srtt && inet_csk((struct sock *)tp_it)->icsk_ca_state == TCP_CA_Open) {
  15077. + tp_it->snd_cwnd = max(tp_it->snd_cwnd >> 1U, 1U);
  15078. + if (tp_it->snd_ssthresh != TCP_INFINITE_SSTHRESH)
  15079. + tp_it->snd_ssthresh = max(tp_it->snd_ssthresh >> 1U, 2U);
  15080. +
  15081. + tp->mptcp->last_rbuf_opti = tcp_time_stamp;
  15082. + }
  15083. + break;
  15084. + }
  15085. + }
  15086. +
  15087. +retrans:
  15088. +
  15089. + /* Segment not yet injected into this path? Take it!!! */
  15090. + if (!(TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp->mptcp->path_index))) {
  15091. + bool do_retrans = false;
  15092. + mptcp_for_each_tp(tp->mpcb, tp_it) {
  15093. + if (tp_it != tp &&
  15094. + TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) {
  15095. + if (tp_it->snd_cwnd <= 4) {
  15096. + do_retrans = true;
  15097. + break;
  15098. + }
  15099. +
  15100. + if (4 * tp->srtt >= tp_it->srtt) {
  15101. + do_retrans = false;
  15102. + break;
  15103. + } else {
  15104. + do_retrans = true;
  15105. + }
  15106. + }
  15107. + }
  15108. +
  15109. + if (do_retrans)
  15110. + return skb_head;
  15111. + }
  15112. + return NULL;
  15113. +}
  15114. +
  15115. +int mptcp_write_xmit(struct sock *meta_sk, unsigned int mss_now, int nonagle,
  15116. + int push_one, gfp_t gfp)
  15117. +{
  15118. + struct tcp_sock *meta_tp = tcp_sk(meta_sk), *subtp;
  15119. + struct sock *subsk;
  15120. + struct mptcp_cb *mpcb = meta_tp->mpcb;
  15121. + struct sk_buff *skb;
  15122. + unsigned int tso_segs, old_factor, sent_pkts;
  15123. + int cwnd_quota;
  15124. + int result;
  15125. + int reinject = 0;
  15126. +
  15127. + sent_pkts = 0;
  15128. +
  15129. + /* Currently mtu-probing is not done in MPTCP */
  15130. + if (!push_one && 0) {
  15131. + /* Do MTU probing. */
  15132. + result = tcp_mtu_probe(meta_sk);
  15133. + if (!result)
  15134. + return 0;
  15135. + else if (result > 0)
  15136. + sent_pkts = 1;
  15137. + }
  15138. +
  15139. + while ((skb = mptcp_next_segment(meta_sk, &reinject))) {
  15140. + unsigned int limit;
  15141. + struct sk_buff *subskb = NULL;
  15142. + u32 noneligible = mpcb->noneligible;
  15143. +
  15144. + if (reinject == 1) {
  15145. + if (!after(TCP_SKB_CB(skb)->end_seq, meta_tp->snd_una)) {
  15146. + /* Segment already reached the peer, take the next one */
  15147. + __skb_unlink(skb, &mpcb->reinject_queue);
  15148. + __kfree_skb(skb);
  15149. + continue;
  15150. + }
  15151. +
  15152. + /* Reinjection and it is coming from a subflow? We need
  15153. + * to find out the path-mask from the meta-write-queue
  15154. + * to properly select a subflow.
  15155. + */
  15156. + if (!TCP_SKB_CB(skb)->path_mask)
  15157. + mptcp_find_and_set_pathmask(meta_sk, skb);
  15158. + }
  15159. +
  15160. +subflow:
  15161. + subsk = get_available_subflow(meta_sk, skb, &mss_now);
  15162. + if (!subsk)
  15163. + break;
  15164. + subtp = tcp_sk(subsk);
  15165. +
  15166. + /* Since all subsocks are locked before calling the scheduler,
  15167. + * the tcp_send_head should not change.
  15168. + */
  15169. + BUG_ON(!reinject && tcp_send_head(meta_sk) != skb);
  15170. +retry:
  15171. + /* If the segment was cloned (e.g. a meta retransmission),
  15172. + * the header must be expanded/copied so that there is no
  15173. + * corruption of TSO information.
  15174. + */
  15175. + if (skb_unclone(skb, GFP_ATOMIC))
  15176. + break;
  15177. +
  15178. + old_factor = tcp_skb_pcount(skb);
  15179. + tcp_set_skb_tso_segs(meta_sk, skb, mss_now);
  15180. + tso_segs = tcp_skb_pcount(skb);
  15181. +
  15182. + if (reinject == -1) {
  15183. + /* The packet has already once been sent, so if we
  15184. + * change the pcount here we have to adjust packets_out
  15185. + * in the meta-sk
  15186. + */
  15187. + int diff = old_factor - tso_segs;
  15188. +
  15189. + if (diff)
  15190. + tcp_adjust_pcount(meta_sk, skb, diff);
  15191. + }
  15192. +
  15193. + cwnd_quota = tcp_cwnd_test(subtp, skb);
  15194. + if (!cwnd_quota) {
  15195. + /* May happen due to two cases:
  15196. + *
  15197. + * - if at the first selection we circumvented
  15198. + * the test due to a DATA_FIN (and got rejected at
  15199. + * tcp_snd_wnd_test), but the reinjected segment is not
  15200. + * a DATA_FIN.
  15201. + * - if we take a DATA_FIN with data, but
  15202. + * tcp_set_skb_tso_segs() increases the number of
  15203. + * tso_segs to something > 1. Then, cwnd_test might
  15204. + * reject it.
  15205. + */
  15206. + mpcb->noneligible |= mptcp_pi_to_flag(subtp->mptcp->path_index);
  15207. + continue;
  15208. + }
  15209. +
  15210. + if (!reinject && unlikely(!tcp_snd_wnd_test(meta_tp, skb, mss_now))) {
  15211. + skb = mptcp_rcv_buf_optimization(subsk, 1);
  15212. + if (skb) {
  15213. + reinject = -1;
  15214. + goto retry;
  15215. + }
  15216. + break;
  15217. + }
  15218. +
  15219. + if (tso_segs == 1) {
  15220. + if (unlikely(!tcp_nagle_test(meta_tp, skb, mss_now,
  15221. + (tcp_skb_is_last(meta_sk, skb) ?
  15222. + nonagle : TCP_NAGLE_PUSH))))
  15223. + break;
  15224. + } else {
  15225. + /* Do not try to defer the transmission of a reinjected
  15226. + * segment. Send it directly.
  15227. + * If it is not possible to send the TSO segment on the
  15228. + * best subflow right now try to look for another subflow.
  15229. + * If there is no subflow available defer the segment to avoid
  15230. + * the call to mptso_fragment.
  15231. + */
  15232. + if (!push_one && !reinject && tcp_tso_should_defer(subsk, skb)) {
  15233. + mpcb->noneligible |= mptcp_pi_to_flag(subtp->mptcp->path_index);
  15234. + goto subflow;
  15235. + }
  15236. + }
  15237. +
  15238. + limit = mss_now;
  15239. + if (tso_segs > 1 && !tcp_urg_mode(meta_tp))
  15240. + limit = tcp_mss_split_point(subsk, skb, mss_now,
  15241. + min_t(unsigned int,
  15242. + cwnd_quota,
  15243. + subsk->sk_gso_max_segs),
  15244. + nonagle);
  15245. +
  15246. + if (skb->len > limit &&
  15247. + unlikely(mptso_fragment(meta_sk, skb, limit, mss_now, gfp, reinject)))
  15248. + break;
  15249. +
  15250. + subskb = mptcp_skb_entail(subsk, skb, reinject);
  15251. + if (!subskb)
  15252. + break;
  15253. +
  15254. + mpcb->noneligible = noneligible;
  15255. + TCP_SKB_CB(skb)->when = tcp_time_stamp;
  15256. + TCP_SKB_CB(subskb)->when = tcp_time_stamp;
  15257. + if (unlikely(tcp_transmit_skb(subsk, subskb, 1, gfp))) {
  15258. + mptcp_transmit_skb_failed(subsk, skb, subskb);
  15259. + mpcb->noneligible |= mptcp_pi_to_flag(subtp->mptcp->path_index);
  15260. + continue;
  15261. + }
  15262. +
  15263. + if (!reinject) {
  15264. + mptcp_check_sndseq_wrap(meta_tp,
  15265. + TCP_SKB_CB(skb)->end_seq -
  15266. + TCP_SKB_CB(skb)->seq);
  15267. + tcp_event_new_data_sent(meta_sk, skb);
  15268. + }
  15269. +
  15270. + tcp_minshall_update(meta_tp, mss_now, skb);
  15271. + sent_pkts += tcp_skb_pcount(skb);
  15272. + tcp_sk(subsk)->mptcp->sent_pkts += tcp_skb_pcount(skb);
  15273. +
  15274. + mptcp_sub_event_new_data_sent(subsk, subskb, skb);
  15275. +
  15276. + if (reinject > 0) {
  15277. + __skb_unlink(skb, &mpcb->reinject_queue);
  15278. + kfree_skb(skb);
  15279. + }
  15280. +
  15281. + if (push_one)
  15282. + break;
  15283. + }
  15284. +
  15285. + mpcb->noneligible = 0;
  15286. +
  15287. + if (likely(sent_pkts)) {
  15288. + mptcp_for_each_sk(mpcb, subsk) {
  15289. + subtp = tcp_sk(subsk);
  15290. + if (subtp->mptcp->sent_pkts) {
  15291. + if (tcp_in_cwnd_reduction(subsk))
  15292. + subtp->prr_out += subtp->mptcp->sent_pkts;
  15293. + tcp_cwnd_validate(subsk);
  15294. + subtp->mptcp->sent_pkts = 0;
  15295. + }
  15296. + }
  15297. + return 0;
  15298. + }
  15299. +
  15300. + return !meta_tp->packets_out && tcp_send_head(meta_sk);
  15301. +}
  15302. +
  15303. +void mptcp_write_space(struct sock *sk)
  15304. +{
  15305. + mptcp_push_pending_frames(mptcp_meta_sk(sk));
  15306. +}
  15307. +
  15308. +u32 __mptcp_select_window(struct sock *sk)
  15309. +{
  15310. + struct inet_connection_sock *icsk = inet_csk(sk);
  15311. + struct tcp_sock *tp = tcp_sk(sk), *meta_tp = mptcp_meta_tp(tp);
  15312. + int mss, free_space, full_space, window;
  15313. +
  15314. + /* MSS for the peer's data. Previous versions used mss_clamp
  15315. + * here. I don't know if the value based on our guesses
  15316. + * of peer's MSS is better for the performance. It's more correct
  15317. + * but may be worse for the performance because of rcv_mss
  15318. + * fluctuations. --SAW 1998/11/1
  15319. + */
  15320. + mss = icsk->icsk_ack.rcv_mss;
  15321. + free_space = tcp_space(sk);
  15322. + full_space = min_t(int, meta_tp->window_clamp,
  15323. + tcp_full_space(sk));
  15324. +
  15325. + if (mss > full_space)
  15326. + mss = full_space;
  15327. +
  15328. + if (free_space < (full_space >> 1)) {
  15329. + icsk->icsk_ack.quick = 0;
  15330. +
  15331. + if (tcp_memory_pressure)
  15332. + /* TODO this has to be adapted when we support different
  15333. + * MSS's among the subflows.
  15334. + */
  15335. + meta_tp->rcv_ssthresh = min(meta_tp->rcv_ssthresh,
  15336. + 4U * meta_tp->advmss);
  15337. +
  15338. + if (free_space < mss)
  15339. + return 0;
  15340. + }
  15341. +
  15342. + if (free_space > meta_tp->rcv_ssthresh)
  15343. + free_space = meta_tp->rcv_ssthresh;
  15344. +
  15345. + /* Don't do rounding if we are using window scaling, since the
  15346. + * scaled window will not line up with the MSS boundary anyway.
  15347. + */
  15348. + window = meta_tp->rcv_wnd;
  15349. + if (tp->rx_opt.rcv_wscale) {
  15350. + window = free_space;
  15351. +
  15352. + /* Advertise enough space so that it won't get scaled away.
  15353. + * Import case: prevent zero window announcement if
  15354. + * 1<<rcv_wscale > mss.
  15355. + */
  15356. + if (((window >> tp->rx_opt.rcv_wscale) << tp->
  15357. + rx_opt.rcv_wscale) != window)
  15358. + window = (((window >> tp->rx_opt.rcv_wscale) + 1)
  15359. + << tp->rx_opt.rcv_wscale);
  15360. + } else {
  15361. + /* Get the largest window that is a nice multiple of mss.
  15362. + * Window clamp already applied above.
  15363. + * If our current window offering is within 1 mss of the
  15364. + * free space we just keep it. This prevents the divide
  15365. + * and multiply from happening most of the time.
  15366. + * We also don't do any window rounding when the free space
  15367. + * is too small.
  15368. + */
  15369. + if (window <= free_space - mss || window > free_space)
  15370. + window = (free_space / mss) * mss;
  15371. + else if (mss == full_space &&
  15372. + free_space > window + (full_space >> 1))
  15373. + window = free_space;
  15374. + }
  15375. +
  15376. + return window;
  15377. +}
  15378. +
  15379. +void mptcp_syn_options(struct sock *sk, struct tcp_out_options *opts,
  15380. + unsigned *remaining)
  15381. +{
  15382. + struct tcp_sock *tp = tcp_sk(sk);
  15383. +
  15384. + opts->options |= OPTION_MPTCP;
  15385. + if (is_master_tp(tp)) {
  15386. + opts->mptcp_options |= OPTION_MP_CAPABLE | OPTION_TYPE_SYN;
  15387. + *remaining -= MPTCP_SUB_LEN_CAPABLE_SYN_ALIGN;
  15388. + opts->mp_capable.sender_key = tp->mptcp_loc_key;
  15389. + opts->dss_csum = !!sysctl_mptcp_checksum;
  15390. + } else {
  15391. + struct mptcp_cb *mpcb = tp->mpcb;
  15392. +
  15393. + opts->mptcp_options |= OPTION_MP_JOIN | OPTION_TYPE_SYN;
  15394. + *remaining -= MPTCP_SUB_LEN_JOIN_SYN_ALIGN;
  15395. + opts->mp_join_syns.token = mpcb->mptcp_rem_token;
  15396. + opts->addr_id = tp->mptcp->loc_id;
  15397. + opts->mp_join_syns.sender_nonce = tp->mptcp->mptcp_loc_nonce;
  15398. + }
  15399. +}
  15400. +
  15401. +void mptcp_synack_options(struct request_sock *req,
  15402. + struct tcp_out_options *opts, unsigned *remaining)
  15403. +{
  15404. + struct mptcp_request_sock *mtreq;
  15405. + mtreq = mptcp_rsk(req);
  15406. +
  15407. + opts->options |= OPTION_MPTCP;
  15408. + /* MPCB not yet set - thus it's a new MPTCP-session */
  15409. + if (!mtreq->mpcb) {
  15410. + opts->mptcp_options |= OPTION_MP_CAPABLE | OPTION_TYPE_SYNACK;
  15411. + opts->mp_capable.sender_key = mtreq->mptcp_loc_key;
  15412. + opts->dss_csum = !!sysctl_mptcp_checksum || mtreq->dss_csum;
  15413. + *remaining -= MPTCP_SUB_LEN_CAPABLE_SYN_ALIGN;
  15414. + } else {
  15415. + opts->mptcp_options |= OPTION_MP_JOIN | OPTION_TYPE_SYNACK;
  15416. + opts->mp_join_syns.sender_truncated_mac =
  15417. + mtreq->mptcp_hash_tmac;
  15418. + opts->mp_join_syns.sender_nonce = mtreq->mptcp_loc_nonce;
  15419. + opts->addr_id = mtreq->loc_id;
  15420. + *remaining -= MPTCP_SUB_LEN_JOIN_SYNACK_ALIGN;
  15421. + }
  15422. +}
  15423. +
  15424. +void mptcp_established_options(struct sock *sk, struct sk_buff *skb,
  15425. + struct tcp_out_options *opts, unsigned *size)
  15426. +{
  15427. + struct tcp_sock *tp = tcp_sk(sk), *meta_tp = mptcp_meta_tp(tp);
  15428. + struct mptcp_cb *mpcb = tp->mpcb;
  15429. + struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
  15430. +
  15431. + /* In fallback mp_fail-mode, we have to repeat it until the fallback
  15432. + * has been done by the sender
  15433. + */
  15434. + if (unlikely(tp->mptcp->send_mp_fail)) {
  15435. + opts->options |= OPTION_MPTCP;
  15436. + opts->mptcp_options |= OPTION_MP_FAIL;
  15437. + opts->data_ack = (__u32)(mpcb->csum_cutoff_seq >> 32);
  15438. + opts->data_seq = (__u32)mpcb->csum_cutoff_seq;
  15439. + *size += MPTCP_SUB_LEN_FAIL;
  15440. + return;
  15441. + }
  15442. +
  15443. + if (unlikely(tp->send_mp_fclose)) {
  15444. + opts->options |= OPTION_MPTCP;
  15445. + opts->mptcp_options |= OPTION_MP_FCLOSE;
  15446. + opts->mp_capable.receiver_key = mpcb->mptcp_rem_key;
  15447. + *size += MPTCP_SUB_LEN_FCLOSE_ALIGN;
  15448. + return;
  15449. + }
  15450. +
  15451. + /* 1. If we are the sender of the infinite-mapping, we need the
  15452. + * MPTCPHDR_INF-flag, because a retransmission of the
  15453. + * infinite-announcment still needs the mptcp-option.
  15454. + *
  15455. + * We need infinite_cutoff_seq, because retransmissions from before
  15456. + * the infinite-cutoff-moment still need the MPTCP-signalling to stay
  15457. + * consistent.
  15458. + *
  15459. + * 2. If we are the receiver of the infinite-mapping, we always skip
  15460. + * mptcp-options, because acknowledgments from before the
  15461. + * infinite-mapping point have already been sent out.
  15462. + *
  15463. + * I know, the whole infinite-mapping stuff is ugly...
  15464. + *
  15465. + * TODO: Handle wrapped data-sequence numbers
  15466. + * (even if it's very unlikely)
  15467. + */
  15468. + if (unlikely(mpcb->infinite_mapping_snd) &&
  15469. + tp->mptcp->fully_established &&
  15470. + ((mpcb->send_infinite_mapping && tcb &&
  15471. + !(tcb->mptcp_flags & MPTCPHDR_INF) &&
  15472. + !before(tcb->seq, tp->mptcp->infinite_cutoff_seq)) ||
  15473. + !mpcb->send_infinite_mapping))
  15474. + return;
  15475. +
  15476. + if (unlikely(tp->mptcp->include_mpc)) {
  15477. + opts->options |= OPTION_MPTCP;
  15478. + opts->mptcp_options |= OPTION_MP_CAPABLE |
  15479. + OPTION_TYPE_ACK;
  15480. + *size += MPTCP_SUB_LEN_CAPABLE_ACK_ALIGN;
  15481. + opts->mp_capable.sender_key = mpcb->mptcp_loc_key;
  15482. + opts->mp_capable.receiver_key = mpcb->mptcp_rem_key;
  15483. + opts->dss_csum = mpcb->dss_csum;
  15484. +
  15485. + if (skb)
  15486. + tp->mptcp->include_mpc = 0;
  15487. + }
  15488. + if (unlikely(tp->mptcp->pre_established)) {
  15489. + opts->options |= OPTION_MPTCP;
  15490. + opts->mptcp_options |= OPTION_MP_JOIN | OPTION_TYPE_ACK;
  15491. + *size += MPTCP_SUB_LEN_JOIN_ACK_ALIGN;
  15492. + }
  15493. +
  15494. + if (!tp->mptcp->include_mpc && !tp->mptcp->pre_established) {
  15495. + opts->options |= OPTION_MPTCP;
  15496. + opts->mptcp_options |= OPTION_DATA_ACK;
  15497. + /* If !skb, we come from tcp_current_mss and thus we always
  15498. + * assume that the DSS-option will be set for the data-packet.
  15499. + */
  15500. + if (skb && !mptcp_is_data_seq(skb)) {
  15501. + opts->data_ack = meta_tp->rcv_nxt;
  15502. +
  15503. + *size += MPTCP_SUB_LEN_ACK_ALIGN;
  15504. + } else {
  15505. + opts->data_ack = meta_tp->rcv_nxt;
  15506. +
  15507. + /* Doesn't matter, if csum included or not. It will be
  15508. + * either 10 or 12, and thus aligned = 12
  15509. + */
  15510. + *size += MPTCP_SUB_LEN_ACK_ALIGN +
  15511. + MPTCP_SUB_LEN_SEQ_ALIGN;
  15512. + }
  15513. +
  15514. + *size += MPTCP_SUB_LEN_DSS_ALIGN;
  15515. + }
  15516. +
  15517. + if (mpcb->pm_ops->addr_signal)
  15518. + mpcb->pm_ops->addr_signal(sk, size, opts, skb);
  15519. +
  15520. + if (unlikely(tp->mptcp->send_mp_prio) &&
  15521. + MAX_TCP_OPTION_SPACE - *size >= MPTCP_SUB_LEN_PRIO_ALIGN) {
  15522. + opts->options |= OPTION_MPTCP;
  15523. + opts->mptcp_options |= OPTION_MP_PRIO;
  15524. + if (skb)
  15525. + tp->mptcp->send_mp_prio = 0;
  15526. + *size += MPTCP_SUB_LEN_PRIO_ALIGN;
  15527. + }
  15528. +
  15529. + return;
  15530. +}
  15531. +
  15532. +u16 mptcp_select_window(struct sock *sk)
  15533. +{
  15534. + u16 new_win = tcp_select_window(sk);
  15535. + struct tcp_sock *tp = tcp_sk(sk);
  15536. + struct tcp_sock *meta_tp = mptcp_meta_tp(tp);
  15537. +
  15538. + meta_tp->rcv_wnd = tp->rcv_wnd;
  15539. + meta_tp->rcv_wup = meta_tp->rcv_nxt;
  15540. +
  15541. + return new_win;
  15542. +}
  15543. +
  15544. +void mptcp_options_write(__be32 *ptr, struct tcp_sock *tp,
  15545. + struct tcp_out_options *opts,
  15546. + struct sk_buff *skb)
  15547. +{
  15548. + if (unlikely(OPTION_MP_CAPABLE & opts->mptcp_options)) {
  15549. + struct mp_capable *mpc = (struct mp_capable *)ptr;
  15550. +
  15551. + mpc->kind = TCPOPT_MPTCP;
  15552. +
  15553. + if ((OPTION_TYPE_SYN & opts->mptcp_options) ||
  15554. + (OPTION_TYPE_SYNACK & opts->mptcp_options)) {
  15555. + mpc->sender_key = opts->mp_capable.sender_key;
  15556. + mpc->len = MPTCP_SUB_LEN_CAPABLE_SYN;
  15557. + ptr += MPTCP_SUB_LEN_CAPABLE_SYN_ALIGN >> 2;
  15558. + } else if (OPTION_TYPE_ACK & opts->mptcp_options) {
  15559. + mpc->sender_key = opts->mp_capable.sender_key;
  15560. + mpc->receiver_key = opts->mp_capable.receiver_key;
  15561. + mpc->len = MPTCP_SUB_LEN_CAPABLE_ACK;
  15562. + ptr += MPTCP_SUB_LEN_CAPABLE_ACK_ALIGN >> 2;
  15563. + }
  15564. +
  15565. + mpc->sub = MPTCP_SUB_CAPABLE;
  15566. + mpc->ver = 0;
  15567. + mpc->a = opts->dss_csum;
  15568. + mpc->b = 0;
  15569. + mpc->rsv = 0;
  15570. + mpc->h = 1;
  15571. + }
  15572. +
  15573. + if (unlikely(OPTION_MP_JOIN & opts->mptcp_options)) {
  15574. + struct mp_join *mpj = (struct mp_join *)ptr;
  15575. +
  15576. + mpj->kind = TCPOPT_MPTCP;
  15577. + mpj->sub = MPTCP_SUB_JOIN;
  15578. + mpj->rsv = 0;
  15579. + mpj->addr_id = opts->addr_id;
  15580. +
  15581. + if (OPTION_TYPE_SYN & opts->mptcp_options) {
  15582. + mpj->len = MPTCP_SUB_LEN_JOIN_SYN;
  15583. + mpj->u.syn.token = opts->mp_join_syns.token;
  15584. + mpj->u.syn.nonce = opts->mp_join_syns.sender_nonce;
  15585. + mpj->b = tp->mptcp->low_prio;
  15586. + ptr += MPTCP_SUB_LEN_JOIN_SYN_ALIGN >> 2;
  15587. + } else if (OPTION_TYPE_SYNACK & opts->mptcp_options) {
  15588. + mpj->len = MPTCP_SUB_LEN_JOIN_SYNACK;
  15589. + mpj->u.synack.mac =
  15590. + opts->mp_join_syns.sender_truncated_mac;
  15591. + mpj->u.synack.nonce = opts->mp_join_syns.sender_nonce;
  15592. + mpj->b = tp->mptcp->low_prio;
  15593. + ptr += MPTCP_SUB_LEN_JOIN_SYNACK_ALIGN >> 2;
  15594. + } else if (OPTION_TYPE_ACK & opts->mptcp_options) {
  15595. + mpj->len = MPTCP_SUB_LEN_JOIN_ACK;
  15596. + memcpy(mpj->u.ack.mac, &tp->mptcp->sender_mac[0], 20);
  15597. + ptr += MPTCP_SUB_LEN_JOIN_ACK_ALIGN >> 2;
  15598. + }
  15599. + }
  15600. + if (unlikely(OPTION_ADD_ADDR & opts->mptcp_options)) {
  15601. + struct mp_add_addr *mpadd = (struct mp_add_addr *)ptr;
  15602. +
  15603. + mpadd->kind = TCPOPT_MPTCP;
  15604. + if (opts->add_addr_v4) {
  15605. + mpadd->len = MPTCP_SUB_LEN_ADD_ADDR4;
  15606. + mpadd->sub = MPTCP_SUB_ADD_ADDR;
  15607. + mpadd->ipver = 4;
  15608. + mpadd->addr_id = opts->add_addr4.addr_id;
  15609. + mpadd->u.v4.addr = opts->add_addr4.addr;
  15610. + ptr += MPTCP_SUB_LEN_ADD_ADDR4_ALIGN >> 2;
  15611. + } else if (opts->add_addr_v6) {
  15612. + mpadd->len = MPTCP_SUB_LEN_ADD_ADDR6;
  15613. + mpadd->sub = MPTCP_SUB_ADD_ADDR;
  15614. + mpadd->ipver = 6;
  15615. + mpadd->addr_id = opts->add_addr6.addr_id;
  15616. + memcpy(&mpadd->u.v6.addr, &opts->add_addr6.addr,
  15617. + sizeof(mpadd->u.v6.addr));
  15618. + ptr += MPTCP_SUB_LEN_ADD_ADDR6_ALIGN >> 2;
  15619. + }
  15620. + }
  15621. + if (unlikely(OPTION_REMOVE_ADDR & opts->mptcp_options)) {
  15622. + struct mp_remove_addr *mprem = (struct mp_remove_addr *)ptr;
  15623. + u8 *addrs_id;
  15624. + int id, len, len_align;
  15625. +
  15626. + len = mptcp_sub_len_remove_addr(opts->remove_addrs);
  15627. + len_align = mptcp_sub_len_remove_addr_align(opts->remove_addrs);
  15628. +
  15629. + mprem->kind = TCPOPT_MPTCP;
  15630. + mprem->len = len;
  15631. + mprem->sub = MPTCP_SUB_REMOVE_ADDR;
  15632. + mprem->rsv = 0;
  15633. + addrs_id = &mprem->addrs_id;
  15634. +
  15635. + mptcp_for_each_bit_set(opts->remove_addrs, id)
  15636. + *(addrs_id++) = id;
  15637. +
  15638. + /* Fill the rest with NOP's */
  15639. + if (len_align > len) {
  15640. + int i;
  15641. + for (i = 0; i < len_align - len; i++)
  15642. + *(addrs_id++) = TCPOPT_NOP;
  15643. + }
  15644. +
  15645. + ptr += len_align >> 2;
  15646. + }
  15647. + if (unlikely(OPTION_MP_FAIL & opts->mptcp_options)) {
  15648. + struct mp_fail *mpfail = (struct mp_fail *)ptr;
  15649. +
  15650. + mpfail->kind = TCPOPT_MPTCP;
  15651. + mpfail->len = MPTCP_SUB_LEN_FAIL;
  15652. + mpfail->sub = MPTCP_SUB_FAIL;
  15653. + mpfail->rsv1 = 0;
  15654. + mpfail->rsv2 = 0;
  15655. + mpfail->data_seq = htonll(((u64)opts->data_ack << 32) | opts->data_seq);
  15656. +
  15657. + ptr += MPTCP_SUB_LEN_FAIL_ALIGN >> 2;
  15658. + }
  15659. + if (unlikely(OPTION_MP_FCLOSE & opts->mptcp_options)) {
  15660. + struct mp_fclose *mpfclose = (struct mp_fclose *)ptr;
  15661. +
  15662. + mpfclose->kind = TCPOPT_MPTCP;
  15663. + mpfclose->len = MPTCP_SUB_LEN_FCLOSE;
  15664. + mpfclose->sub = MPTCP_SUB_FCLOSE;
  15665. + mpfclose->rsv1 = 0;
  15666. + mpfclose->rsv2 = 0;
  15667. + mpfclose->key = opts->mp_capable.receiver_key;
  15668. +
  15669. + ptr += MPTCP_SUB_LEN_FCLOSE_ALIGN >> 2;
  15670. + }
  15671. +
  15672. + if (OPTION_DATA_ACK & opts->mptcp_options) {
  15673. + if (!mptcp_is_data_seq(skb)) {
  15674. + struct mp_dss *mdss = (struct mp_dss *)ptr;
  15675. +
  15676. + mdss->kind = TCPOPT_MPTCP;
  15677. + mdss->sub = MPTCP_SUB_DSS;
  15678. + mdss->rsv1 = 0;
  15679. + mdss->rsv2 = 0;
  15680. + mdss->F = 0;
  15681. + mdss->m = 0;
  15682. + mdss->M = 0;
  15683. + mdss->a = 0;
  15684. + mdss->A = 1;
  15685. + mdss->len = mptcp_sub_len_dss(mdss, tp->mpcb->dss_csum);
  15686. +
  15687. + ptr++;
  15688. + *ptr++ = htonl(opts->data_ack);
  15689. + } else {
  15690. + /**** Just update the data_ack ****/
  15691. +
  15692. + /* Get pointer to data_ack-field. MPTCP is always at
  15693. + * the end of the TCP-options.
  15694. + */
  15695. + /* TODO if we allow sending 64-bit dseq's we have to change "16" */
  15696. + __be32 *dack = (__be32 *)(skb->data + (tcp_hdr(skb)->doff << 2) - 16);
  15697. +
  15698. + *dack = htonl(opts->data_ack);
  15699. + }
  15700. + }
  15701. + if (unlikely(OPTION_MP_PRIO & opts->mptcp_options)) {
  15702. + struct mp_prio *mpprio = (struct mp_prio *)ptr;
  15703. +
  15704. + mpprio->kind = TCPOPT_MPTCP;
  15705. + mpprio->len = MPTCP_SUB_LEN_PRIO;
  15706. + mpprio->sub = MPTCP_SUB_PRIO;
  15707. + mpprio->rsv = 0;
  15708. + mpprio->b = tp->mptcp->low_prio;
  15709. + mpprio->addr_id = TCPOPT_NOP;
  15710. +
  15711. + ptr += MPTCP_SUB_LEN_PRIO_ALIGN >> 2;
  15712. + }
  15713. +}
  15714. +
  15715. +/* Returns the next segment to be sent from the mptcp meta-queue.
  15716. + * (chooses the reinject queue if any segment is waiting in it, otherwise,
  15717. + * chooses the normal write queue).
  15718. + * Sets *@reinject to 1 if the returned segment comes from the
  15719. + * reinject queue. Sets it to 0 if it is the regular send-head of the meta-sk,
  15720. + * and sets it to -1 if it is a meta-level retransmission to optimize the
  15721. + * receive-buffer.
  15722. + */
  15723. +struct sk_buff *mptcp_next_segment(struct sock *meta_sk, int *reinject)
  15724. +{
  15725. + struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
  15726. + struct sk_buff *skb = NULL;
  15727. + if (reinject)
  15728. + *reinject = 0;
  15729. +
  15730. + /* If we are in fallback-mode, just take from the meta-send-queue */
  15731. + if (mpcb->infinite_mapping_snd || mpcb->send_infinite_mapping)
  15732. + return tcp_send_head(meta_sk);
  15733. +
  15734. + skb = skb_peek(&mpcb->reinject_queue);
  15735. +
  15736. + if (skb) {
  15737. + if (reinject)
  15738. + *reinject = 1;
  15739. + } else {
  15740. + skb = tcp_send_head(meta_sk);
  15741. +
  15742. + if (!skb && meta_sk->sk_socket &&
  15743. + test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) &&
  15744. + sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) {
  15745. + struct sock *subsk = get_available_subflow(meta_sk, NULL, NULL);
  15746. + if (!subsk)
  15747. + return NULL;
  15748. +
  15749. + skb = mptcp_rcv_buf_optimization(subsk, 0);
  15750. + if (skb && reinject)
  15751. + *reinject = -1;
  15752. + }
  15753. + }
  15754. + return skb;
  15755. +}
  15756. +
  15757. +/* Sends the datafin */
  15758. +void mptcp_send_fin(struct sock *meta_sk)
  15759. +{
  15760. + struct tcp_sock *meta_tp = tcp_sk(meta_sk);
  15761. + struct sk_buff *skb = tcp_write_queue_tail(meta_sk);
  15762. + int mss_now;
  15763. +
  15764. + if ((1 << meta_sk->sk_state) & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
  15765. + meta_tp->mpcb->passive_close = 1;
  15766. +
  15767. + /* Optimization, tack on the FIN if we have a queue of
  15768. + * unsent frames. But be careful about outgoing SACKS
  15769. + * and IP options.
  15770. + */
  15771. + mss_now = mptcp_current_mss(meta_sk);
  15772. +
  15773. + if (tcp_send_head(meta_sk) != NULL) {
  15774. + TCP_SKB_CB(skb)->mptcp_flags |= MPTCPHDR_FIN;
  15775. + TCP_SKB_CB(skb)->end_seq++;
  15776. + meta_tp->write_seq++;
  15777. + } else {
  15778. + /* Socket is locked, keep trying until memory is available. */
  15779. + for (;;) {
  15780. + skb = alloc_skb_fclone(MAX_TCP_HEADER,
  15781. + meta_sk->sk_allocation);
  15782. + if (skb)
  15783. + break;
  15784. + yield();
  15785. + }
  15786. + /* Reserve space for headers and prepare control bits. */
  15787. + skb_reserve(skb, MAX_TCP_HEADER);
  15788. +
  15789. + tcp_init_nondata_skb(skb, meta_tp->write_seq, TCPHDR_ACK);
  15790. + TCP_SKB_CB(skb)->end_seq++;
  15791. + TCP_SKB_CB(skb)->mptcp_flags |= MPTCPHDR_FIN | MPTCPHDR_SEQ;
  15792. + tcp_queue_skb(meta_sk, skb);
  15793. + }
  15794. + __tcp_push_pending_frames(meta_sk, mss_now, TCP_NAGLE_OFF);
  15795. +}
  15796. +
  15797. +void mptcp_send_active_reset(struct sock *meta_sk, gfp_t priority)
  15798. +{
  15799. + struct tcp_sock *meta_tp = tcp_sk(meta_sk);
  15800. + struct mptcp_cb *mpcb = meta_tp->mpcb;
  15801. + struct sock *sk = NULL, *sk_it = NULL, *tmpsk;
  15802. +
  15803. + if (!mpcb->cnt_subflows)
  15804. + return;
  15805. +
  15806. + WARN_ON(meta_tp->send_mp_fclose);
  15807. +
  15808. + /* First - select a socket */
  15809. + sk = mptcp_select_ack_sock(meta_sk, 0);
  15810. +
  15811. + /* May happen if no subflow is in an appropriate state */
  15812. + if (!sk)
  15813. + return;
  15814. +
  15815. + /* We are in infinite mode - just send a reset */
  15816. + if (mpcb->infinite_mapping_snd || mpcb->infinite_mapping_rcv) {
  15817. + sk->sk_err = ECONNRESET;
  15818. + if (tcp_need_reset(sk->sk_state))
  15819. + tcp_send_active_reset(sk, priority);
  15820. + mptcp_sub_force_close(sk);
  15821. + return;
  15822. + }
  15823. +
  15824. +
  15825. + tcp_sk(sk)->send_mp_fclose = 1;
  15826. + /** Reset all other subflows */
  15827. +
  15828. + /* tcp_done must be handled with bh disabled */
  15829. + if (!in_serving_softirq())
  15830. + local_bh_disable();
  15831. +
  15832. + mptcp_for_each_sk_safe(mpcb, sk_it, tmpsk) {
  15833. + if (tcp_sk(sk_it)->send_mp_fclose)
  15834. + continue;
  15835. +
  15836. + sk_it->sk_err = ECONNRESET;
  15837. + if (tcp_need_reset(sk_it->sk_state))
  15838. + tcp_send_active_reset(sk_it, GFP_ATOMIC);
  15839. + mptcp_sub_force_close(sk_it);
  15840. + }
  15841. +
  15842. + if (!in_serving_softirq())
  15843. + local_bh_enable();
  15844. +
  15845. + tcp_send_ack(sk);
  15846. + inet_csk_reset_keepalive_timer(sk, inet_csk(sk)->icsk_rto);
  15847. +
  15848. + meta_tp->send_mp_fclose = 1;
  15849. +}
  15850. +
  15851. +static void mptcp_ack_retransmit_timer(struct sock *sk)
  15852. +{
  15853. + struct sk_buff *skb;
  15854. + struct tcp_sock *tp = tcp_sk(sk);
  15855. + struct inet_connection_sock *icsk = inet_csk(sk);
  15856. +
  15857. + if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
  15858. + goto out; /* Routing failure or similar */
  15859. +
  15860. + if (!tp->retrans_stamp)
  15861. + tp->retrans_stamp = tcp_time_stamp ? : 1;
  15862. +
  15863. + if (tcp_write_timeout(sk)) {
  15864. + tp->mptcp->pre_established = 0;
  15865. + sk_stop_timer(sk, &tp->mptcp->mptcp_ack_timer);
  15866. + tcp_send_active_reset(sk, GFP_ATOMIC);
  15867. + goto out;
  15868. + }
  15869. +
  15870. + skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
  15871. + if (skb == NULL) {
  15872. + sk_reset_timer(sk, &tp->mptcp->mptcp_ack_timer,
  15873. + jiffies + icsk->icsk_rto);
  15874. + return;
  15875. + }
  15876. +
  15877. + /* Reserve space for headers and prepare control bits */
  15878. + skb_reserve(skb, MAX_TCP_HEADER);
  15879. + tcp_init_nondata_skb(skb, tp->snd_una, TCPHDR_ACK);
  15880. +
  15881. + TCP_SKB_CB(skb)->when = tcp_time_stamp;
  15882. + if (tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC) > 0) {
  15883. + /* Retransmission failed because of local congestion,
  15884. + * do not backoff.
  15885. + */
  15886. + if (!icsk->icsk_retransmits)
  15887. + icsk->icsk_retransmits = 1;
  15888. + sk_reset_timer(sk, &tp->mptcp->mptcp_ack_timer,
  15889. + jiffies + icsk->icsk_rto);
  15890. + return;
  15891. + }
  15892. +
  15893. +
  15894. + icsk->icsk_retransmits++;
  15895. + icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
  15896. + sk_reset_timer(sk, &tp->mptcp->mptcp_ack_timer,
  15897. + jiffies + icsk->icsk_rto);
  15898. + if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0, 0)) {
  15899. + __sk_dst_reset(sk);
  15900. + }
  15901. +
  15902. +out:;
  15903. +}
  15904. +
  15905. +void mptcp_ack_handler(unsigned long data)
  15906. +{
  15907. + struct sock *sk = (struct sock *)data;
  15908. + struct sock *meta_sk = mptcp_meta_sk(sk);
  15909. +
  15910. + bh_lock_sock(meta_sk);
  15911. + if (sock_owned_by_user(meta_sk)) {
  15912. + /* Try again later */
  15913. + sk_reset_timer(sk, &tcp_sk(sk)->mptcp->mptcp_ack_timer,
  15914. + jiffies + (HZ / 20));
  15915. + goto out_unlock;
  15916. + }
  15917. +
  15918. + if (sk->sk_state == TCP_CLOSE)
  15919. + goto out_unlock;
  15920. +
  15921. + mptcp_ack_retransmit_timer(sk);
  15922. +
  15923. + sk_mem_reclaim(sk);
  15924. +
  15925. +out_unlock:
  15926. + bh_unlock_sock(meta_sk);
  15927. + sock_put(sk);
  15928. +}
  15929. +
  15930. +/* Similar to tcp_retransmit_skb
  15931. + *
  15932. + * The diff is that we handle the retransmission-stats (retrans_stamp) at the
  15933. + * meta-level.
  15934. + */
  15935. +int mptcp_retransmit_skb(struct sock *meta_sk, struct sk_buff *skb)
  15936. +{
  15937. + struct tcp_sock *meta_tp = tcp_sk(meta_sk);
  15938. + struct sock *subsk;
  15939. + struct sk_buff *subskb;
  15940. + unsigned int limit, tso_segs, mss_now;
  15941. + int err = -1, oldpcount;
  15942. +
  15943. + /* Do not sent more than we queued. 1/4 is reserved for possible
  15944. + * copying overhead: fragmentation, tunneling, mangling etc.
  15945. + *
  15946. + * This is a meta-retransmission thus we check on the meta-socket.
  15947. + */
  15948. + if (atomic_read(&meta_sk->sk_wmem_alloc) >
  15949. + min(meta_sk->sk_wmem_queued + (meta_sk->sk_wmem_queued >> 2), meta_sk->sk_sndbuf)) {
  15950. + return -EAGAIN;
  15951. + }
  15952. +
  15953. + /* We need to make sure that the retransmitted segment can be sent on a
  15954. + * subflow right now. If it is too big, it needs to be fragmented.
  15955. + */
  15956. + subsk = get_available_subflow(meta_sk, skb, &mss_now);
  15957. + if (!subsk) {
  15958. + /* We want to increase icsk_retransmits, thus return 0, so that
  15959. + * mptcp_retransmit_timer enters the desired branch.
  15960. + */
  15961. + err = 0;
  15962. + goto failed;
  15963. + }
  15964. +
  15965. + /* If the segment was cloned (e.g. a meta retransmission), the header
  15966. + * must be expanded/copied so that there is no corruption of TSO
  15967. + * information.
  15968. + */
  15969. + if (skb_unclone(skb, GFP_ATOMIC)) {
  15970. + err = ENOMEM;
  15971. + goto failed;
  15972. + }
  15973. +
  15974. + oldpcount = tcp_skb_pcount(skb);
  15975. + tcp_set_skb_tso_segs(meta_sk, skb, mss_now);
  15976. + tso_segs = tcp_skb_pcount(skb);
  15977. + BUG_ON(!tso_segs);
  15978. +
  15979. + /* The MSS might have changed and so the number of segments. We
  15980. + * need to account for this change.
  15981. + */
  15982. + if (unlikely(oldpcount != tso_segs))
  15983. + tcp_adjust_pcount(meta_sk, skb, oldpcount - tso_segs);
  15984. +
  15985. + limit = mss_now;
  15986. + if (tso_segs > 1 && !tcp_urg_mode(meta_tp))
  15987. + limit = tcp_mss_split_point(subsk, skb, mss_now,
  15988. + min_t(unsigned int,
  15989. + tcp_cwnd_test(tcp_sk(subsk), skb),
  15990. + subsk->sk_gso_max_segs),
  15991. + TCP_NAGLE_OFF);
  15992. +
  15993. + if (skb->len > limit &&
  15994. + unlikely(mptso_fragment(meta_sk, skb, limit, mss_now,
  15995. + GFP_ATOMIC, 0)))
  15996. + goto failed;
  15997. +
  15998. + subskb = mptcp_skb_entail(subsk, skb, -1);
  15999. + if (!subskb)
  16000. + goto failed;
  16001. +
  16002. + TCP_SKB_CB(skb)->when = tcp_time_stamp;
  16003. + TCP_SKB_CB(subskb)->when = tcp_time_stamp;
  16004. + err = tcp_transmit_skb(subsk, subskb, 1, GFP_ATOMIC);
  16005. + if (!err) {
  16006. + /* Update global TCP statistics. */
  16007. + TCP_INC_STATS(sock_net(meta_sk), TCP_MIB_RETRANSSEGS);
  16008. +
  16009. + /* Diff to tcp_retransmit_skb */
  16010. +
  16011. + /* Save stamp of the first retransmit. */
  16012. + if (!meta_tp->retrans_stamp)
  16013. + meta_tp->retrans_stamp = TCP_SKB_CB(subskb)->when;
  16014. + mptcp_sub_event_new_data_sent(subsk, subskb, skb);
  16015. + } else {
  16016. + mptcp_transmit_skb_failed(subsk, skb, subskb);
  16017. + }
  16018. +
  16019. +failed:
  16020. + return err;
  16021. +}
  16022. +
  16023. +/* Similar to tcp_retransmit_timer
  16024. + *
  16025. + * The diff is that we have to handle retransmissions of the FAST_CLOSE-message
  16026. + * and that we don't have an srtt estimation at the meta-level.
  16027. + */
  16028. +void mptcp_retransmit_timer(struct sock *meta_sk)
  16029. +{
  16030. + struct tcp_sock *meta_tp = tcp_sk(meta_sk);
  16031. + struct mptcp_cb *mpcb = meta_tp->mpcb;
  16032. + struct inet_connection_sock *meta_icsk = inet_csk(meta_sk);
  16033. + int err;
  16034. +
  16035. + /* In fallback, retransmission is handled at the subflow-level */
  16036. + if (!meta_tp->packets_out || mpcb->infinite_mapping_snd ||
  16037. + mpcb->send_infinite_mapping)
  16038. + return;
  16039. +
  16040. + WARN_ON(tcp_write_queue_empty(meta_sk));
  16041. +
  16042. + if (!meta_tp->snd_wnd && !sock_flag(meta_sk, SOCK_DEAD) &&
  16043. + !((1 << meta_sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
  16044. + /* Receiver dastardly shrinks window. Our retransmits
  16045. + * become zero probes, but we should not timeout this
  16046. + * connection. If the socket is an orphan, time it out,
  16047. + * we cannot allow such beasts to hang infinitely.
  16048. + */
  16049. + struct inet_sock *meta_inet = inet_sk(meta_sk);
  16050. + if (meta_sk->sk_family == AF_INET) {
  16051. + LIMIT_NETDEBUG(KERN_DEBUG "MPTCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
  16052. + &meta_inet->inet_daddr,
  16053. + ntohs(meta_inet->inet_dport),
  16054. + meta_inet->inet_num, meta_tp->snd_una,
  16055. + meta_tp->snd_nxt);
  16056. + }
  16057. +#if IS_ENABLED(CONFIG_IPV6)
  16058. + else if (meta_sk->sk_family == AF_INET6) {
  16059. + LIMIT_NETDEBUG(KERN_DEBUG "MPTCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
  16060. + &meta_sk->sk_v6_daddr,
  16061. + ntohs(meta_inet->inet_dport),
  16062. + meta_inet->inet_num, meta_tp->snd_una,
  16063. + meta_tp->snd_nxt);
  16064. + }
  16065. +#endif
  16066. + if (tcp_time_stamp - meta_tp->rcv_tstamp > TCP_RTO_MAX) {
  16067. + tcp_write_err(meta_sk);
  16068. + return;
  16069. + }
  16070. +
  16071. + mptcp_retransmit_skb(meta_sk, tcp_write_queue_head(meta_sk));
  16072. + goto out_reset_timer;
  16073. + }
  16074. +
  16075. + if (tcp_write_timeout(meta_sk))
  16076. + return;
  16077. +
  16078. + if (meta_icsk->icsk_retransmits == 0)
  16079. + NET_INC_STATS_BH(sock_net(meta_sk), LINUX_MIB_TCPTIMEOUTS);
  16080. +
  16081. + meta_icsk->icsk_ca_state = TCP_CA_Loss;
  16082. +
  16083. + err = mptcp_retransmit_skb(meta_sk, tcp_write_queue_head(meta_sk));
  16084. + if (err > 0) {
  16085. + /* Retransmission failed because of local congestion,
  16086. + * do not backoff.
  16087. + */
  16088. + if (!meta_icsk->icsk_retransmits)
  16089. + meta_icsk->icsk_retransmits = 1;
  16090. + inet_csk_reset_xmit_timer(meta_sk, ICSK_TIME_RETRANS,
  16091. + min(meta_icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL),
  16092. + TCP_RTO_MAX);
  16093. + return;
  16094. + }
  16095. +
  16096. + /* Increase the timeout each time we retransmit. Note that
  16097. + * we do not increase the rtt estimate. rto is initialized
  16098. + * from rtt, but increases here. Jacobson (SIGCOMM 88) suggests
  16099. + * that doubling rto each time is the least we can get away with.
  16100. + * In KA9Q, Karn uses this for the first few times, and then
  16101. + * goes to quadratic. netBSD doubles, but only goes up to *64,
  16102. + * and clamps at 1 to 64 sec afterwards. Note that 120 sec is
  16103. + * defined in the protocol as the maximum possible RTT. I guess
  16104. + * we'll have to use something other than TCP to talk to the
  16105. + * University of Mars.
  16106. + *
  16107. + * PAWS allows us longer timeouts and large windows, so once
  16108. + * implemented ftp to mars will work nicely. We will have to fix
  16109. + * the 120 second clamps though!
  16110. + */
  16111. + meta_icsk->icsk_backoff++;
  16112. + meta_icsk->icsk_retransmits++;
  16113. +
  16114. +out_reset_timer:
  16115. + /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
  16116. + * used to reset timer, set to 0. Recalculate 'icsk_rto' as this
  16117. + * might be increased if the stream oscillates between thin and thick,
  16118. + * thus the old value might already be too high compared to the value
  16119. + * set by 'tcp_set_rto' in tcp_input.c which resets the rto without
  16120. + * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating
  16121. + * exponential backoff behaviour to avoid continue hammering
  16122. + * linear-timeout retransmissions into a black hole
  16123. + */
  16124. + if (meta_sk->sk_state == TCP_ESTABLISHED &&
  16125. + (meta_tp->thin_lto || sysctl_tcp_thin_linear_timeouts) &&
  16126. + tcp_stream_is_thin(meta_tp) &&
  16127. + meta_icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
  16128. + meta_icsk->icsk_backoff = 0;
  16129. + /* We cannot do the same as in tcp_write_timer because the
  16130. + * srtt is not set here.
  16131. + */
  16132. + mptcp_set_rto(meta_sk);
  16133. + } else {
  16134. + /* Use normal (exponential) backoff */
  16135. + meta_icsk->icsk_rto = min(meta_icsk->icsk_rto << 1, TCP_RTO_MAX);
  16136. + }
  16137. + inet_csk_reset_xmit_timer(meta_sk, ICSK_TIME_RETRANS, meta_icsk->icsk_rto, TCP_RTO_MAX);
  16138. +
  16139. + return;
  16140. +}
  16141. +
  16142. +/* Modify values to an mptcp-level for the initial window of new subflows */
  16143. +void mptcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd,
  16144. + __u32 *window_clamp, int wscale_ok,
  16145. + __u8 *rcv_wscale, __u32 init_rcv_wnd,
  16146. + const struct sock *sk)
  16147. +{
  16148. + struct mptcp_cb *mpcb = tcp_sk(sk)->mpcb;
  16149. +
  16150. + *window_clamp = mpcb->orig_window_clamp;
  16151. + __space = tcp_win_from_space(mpcb->orig_sk_rcvbuf);
  16152. +
  16153. + tcp_select_initial_window(__space, mss, rcv_wnd, window_clamp,
  16154. + wscale_ok, rcv_wscale, init_rcv_wnd, sk);
  16155. +}
  16156. +
  16157. +unsigned int mptcp_current_mss(struct sock *meta_sk)
  16158. +{
  16159. + unsigned int mss = 0;
  16160. + struct sock *sk;
  16161. +
  16162. + mptcp_for_each_sk(tcp_sk(meta_sk)->mpcb, sk) {
  16163. + int this_mss;
  16164. +
  16165. + if (!mptcp_sk_can_send(sk))
  16166. + continue;
  16167. +
  16168. + this_mss = tcp_current_mss(sk);
  16169. + if (this_mss > mss)
  16170. + mss = this_mss;
  16171. + }
  16172. +
  16173. + /* If no subflow is available, we take a default-mss from the
  16174. + * meta-socket.
  16175. + */
  16176. + return !mss ? tcp_current_mss(meta_sk) : mss;
  16177. +}
  16178. +
  16179. +int mptcp_select_size(const struct sock *meta_sk, bool sg)
  16180. +{
  16181. + int mss = 0; /* We look for the smallest MSS */
  16182. + struct sock *sk;
  16183. +
  16184. + mptcp_for_each_sk(tcp_sk(meta_sk)->mpcb, sk) {
  16185. + int this_mss;
  16186. +
  16187. + if (!mptcp_sk_can_send(sk))
  16188. + continue;
  16189. +
  16190. + this_mss = tcp_sk(sk)->mss_cache;
  16191. + if (this_mss > mss)
  16192. + mss = this_mss;
  16193. + }
  16194. +
  16195. + if (sg) {
  16196. + if (mptcp_sk_can_gso(meta_sk)) {
  16197. + mss = SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER);
  16198. + } else {
  16199. + int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
  16200. +
  16201. + if (mss >= pgbreak &&
  16202. + mss <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE)
  16203. + mss = pgbreak;
  16204. + }
  16205. + }
  16206. +
  16207. + return !mss ? tcp_sk(meta_sk)->mss_cache : mss;
  16208. +}
  16209. +
  16210. +int mptcp_check_snd_buf(const struct tcp_sock *tp)
  16211. +{
  16212. + struct sock *sk;
  16213. + u32 rtt_max = tp->srtt;
  16214. + u64 bw_est;
  16215. +
  16216. + if (!tp->srtt)
  16217. + return tp->reordering + 1;
  16218. +
  16219. + mptcp_for_each_sk(tp->mpcb, sk) {
  16220. + if (!mptcp_sk_can_send(sk))
  16221. + continue;
  16222. +
  16223. + if (rtt_max < tcp_sk(sk)->srtt)
  16224. + rtt_max = tcp_sk(sk)->srtt;
  16225. + }
  16226. +
  16227. + bw_est = div64_u64(((u64)tp->snd_cwnd * rtt_max) << 16,
  16228. + (u64)tp->srtt);
  16229. +
  16230. + return max_t(unsigned int, (u32)(bw_est >> 16),
  16231. + tp->reordering + 1);
  16232. +
  16233. +}
  16234. +
  16235. +unsigned int mptcp_xmit_size_goal(struct sock *meta_sk, u32 mss_now,
  16236. + int large_allowed)
  16237. +{
  16238. + struct sock *sk;
  16239. + u32 xmit_size_goal = 0;
  16240. +
  16241. + if (large_allowed && mptcp_sk_can_gso(meta_sk)) {
  16242. + mptcp_for_each_sk(tcp_sk(meta_sk)->mpcb, sk) {
  16243. + int this_size_goal;
  16244. +
  16245. + if (!mptcp_sk_can_send(sk))
  16246. + continue;
  16247. +
  16248. + this_size_goal = tcp_xmit_size_goal(sk, mss_now, 1);
  16249. + if (this_size_goal > xmit_size_goal)
  16250. + xmit_size_goal = this_size_goal;
  16251. + }
  16252. + }
  16253. +
  16254. + return max(xmit_size_goal, mss_now);
  16255. +}
  16256. +
  16257. +/* Similar to tcp_trim_head - but we correctly copy the DSS-option */
  16258. +int mptcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
  16259. +{
  16260. + int dsslen = MPTCP_SUB_LEN_DSS_ALIGN + MPTCP_SUB_LEN_ACK_ALIGN +
  16261. + MPTCP_SUB_LEN_SEQ_ALIGN;
  16262. + char dss[dsslen];
  16263. +
  16264. + /* DSS-option must be recovered afterwards. */
  16265. + memcpy(dss, skb->data - dsslen, dsslen);
  16266. +
  16267. + if (skb_cloned(skb)) {
  16268. + /* pskb_expand_head will delete our DSS-option. We have to copy
  16269. + * it back if pskb_expand_head succeeds.
  16270. + */
  16271. +
  16272. + if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
  16273. + return -ENOMEM;
  16274. +
  16275. + memcpy(skb->data - dsslen, dss, dsslen);
  16276. + }
  16277. +
  16278. + __pskb_trim_head(skb, len);
  16279. +
  16280. + /* Put the DSS-option back in our header */
  16281. + memcpy(skb->data - dsslen, dss, dsslen);
  16282. +
  16283. + TCP_SKB_CB(skb)->seq += len;
  16284. + skb->ip_summed = CHECKSUM_PARTIAL;
  16285. +
  16286. + skb->truesize -= len;
  16287. + sk->sk_wmem_queued -= len;
  16288. + sk_mem_uncharge(sk, len);
  16289. + sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
  16290. +
  16291. + /* Any change of skb->len requires recalculation of tso factor. */
  16292. + if (tcp_skb_pcount(skb) > 1)
  16293. + tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb));
  16294. +
  16295. + return 0;
  16296. +}
  16297. diff -Nur linux-3.14.45.orig/net/mptcp/mptcp_pm.c linux-3.14.45/net/mptcp/mptcp_pm.c
  16298. --- linux-3.14.45.orig/net/mptcp/mptcp_pm.c 1970-01-01 01:00:00.000000000 +0100
  16299. +++ linux-3.14.45/net/mptcp/mptcp_pm.c 2015-06-24 14:15:48.931862523 +0200
  16300. @@ -0,0 +1,170 @@
  16301. +/*
  16302. + * MPTCP implementation - MPTCP-subflow-management
  16303. + *
  16304. + * Initial Design & Implementation:
  16305. + * Sébastien Barré <sebastien.barre@uclouvain.be>
  16306. + *
  16307. + * Current Maintainer & Author:
  16308. + * Christoph Paasch <christoph.paasch@uclouvain.be>
  16309. + *
  16310. + * Additional authors:
  16311. + * Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
  16312. + * Gregory Detal <gregory.detal@uclouvain.be>
  16313. + * Fabien Duchêne <fabien.duchene@uclouvain.be>
  16314. + * Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
  16315. + * Lavkesh Lahngir <lavkesh51@gmail.com>
  16316. + * Andreas Ripke <ripke@neclab.eu>
  16317. + * Vlad Dogaru <vlad.dogaru@intel.com>
  16318. + * Octavian Purdila <octavian.purdila@intel.com>
  16319. + * John Ronan <jronan@tssg.org>
  16320. + * Catalin Nicutar <catalin.nicutar@gmail.com>
  16321. + * Brandon Heller <brandonh@stanford.edu>
  16322. + *
  16323. + *
  16324. + * This program is free software; you can redistribute it and/or
  16325. + * modify it under the terms of the GNU General Public License
  16326. + * as published by the Free Software Foundation; either version
  16327. + * 2 of the License, or (at your option) any later version.
  16328. + */
  16329. +
  16330. +
  16331. +#include <linux/module.h>
  16332. +#include <net/mptcp.h>
  16333. +
  16334. +static DEFINE_SPINLOCK(mptcp_pm_list_lock);
  16335. +static LIST_HEAD(mptcp_pm_list);
  16336. +
  16337. +static int mptcp_default_index(sa_family_t family, union inet_addr *addr,
  16338. + struct net *net)
  16339. +{
  16340. + return 0;
  16341. +}
  16342. +
  16343. +struct mptcp_pm_ops mptcp_pm_default = {
  16344. + .get_local_index = mptcp_default_index,
  16345. + .get_local_id = mptcp_default_index, /* We do not care */
  16346. + .name = "default",
  16347. + .owner = THIS_MODULE,
  16348. +};
  16349. +
  16350. +static struct mptcp_pm_ops *mptcp_pm_find(const char *name)
  16351. +{
  16352. + struct mptcp_pm_ops *e;
  16353. +
  16354. + list_for_each_entry_rcu(e, &mptcp_pm_list, list) {
  16355. + if (strcmp(e->name, name) == 0)
  16356. + return e;
  16357. + }
  16358. +
  16359. + return NULL;
  16360. +}
  16361. +
  16362. +int mptcp_register_path_manager(struct mptcp_pm_ops *pm)
  16363. +{
  16364. + int ret = 0;
  16365. +
  16366. + if (!pm->get_local_index || !pm->get_local_id)
  16367. + return -EINVAL;
  16368. +
  16369. + spin_lock(&mptcp_pm_list_lock);
  16370. + if (mptcp_pm_find(pm->name)) {
  16371. + pr_notice("%s already registered\n", pm->name);
  16372. + ret = -EEXIST;
  16373. + } else {
  16374. + list_add_tail_rcu(&pm->list, &mptcp_pm_list);
  16375. + pr_info("%s registered\n", pm->name);
  16376. + }
  16377. + spin_unlock(&mptcp_pm_list_lock);
  16378. +
  16379. + return ret;
  16380. +}
  16381. +EXPORT_SYMBOL_GPL(mptcp_register_path_manager);
  16382. +
  16383. +void mptcp_unregister_path_manager(struct mptcp_pm_ops *pm)
  16384. +{
  16385. + spin_lock(&mptcp_pm_list_lock);
  16386. + list_del_rcu(&pm->list);
  16387. + spin_unlock(&mptcp_pm_list_lock);
  16388. +}
  16389. +EXPORT_SYMBOL_GPL(mptcp_unregister_path_manager);
  16390. +
  16391. +void mptcp_get_default_path_manager(char *name)
  16392. +{
  16393. + struct mptcp_pm_ops *pm;
  16394. +
  16395. + BUG_ON(list_empty(&mptcp_pm_list));
  16396. +
  16397. + rcu_read_lock();
  16398. + pm = list_entry(mptcp_pm_list.next, struct mptcp_pm_ops, list);
  16399. + strncpy(name, pm->name, MPTCP_PM_NAME_MAX);
  16400. + rcu_read_unlock();
  16401. +}
  16402. +
  16403. +int mptcp_set_default_path_manager(const char *name)
  16404. +{
  16405. + struct mptcp_pm_ops *pm;
  16406. + int ret = -ENOENT;
  16407. +
  16408. + spin_lock(&mptcp_pm_list_lock);
  16409. + pm = mptcp_pm_find(name);
  16410. +#ifdef CONFIG_MODULES
  16411. + if (!pm && capable(CAP_NET_ADMIN)) {
  16412. + spin_unlock(&mptcp_pm_list_lock);
  16413. +
  16414. + request_module("mptcp_%s", name);
  16415. + spin_lock(&mptcp_pm_list_lock);
  16416. + pm = mptcp_pm_find(name);
  16417. + }
  16418. +#endif
  16419. +
  16420. + if (pm) {
  16421. + list_move(&pm->list, &mptcp_pm_list);
  16422. + ret = 0;
  16423. + } else {
  16424. + pr_info("%s is not available\n", name);
  16425. + }
  16426. + spin_unlock(&mptcp_pm_list_lock);
  16427. +
  16428. + return ret;
  16429. +}
  16430. +
  16431. +void mptcp_init_path_manager(struct mptcp_cb *mpcb)
  16432. +{
  16433. + struct mptcp_pm_ops *pm;
  16434. +
  16435. + rcu_read_lock();
  16436. + list_for_each_entry_rcu(pm, &mptcp_pm_list, list) {
  16437. + if (try_module_get(pm->owner)) {
  16438. + mpcb->pm_ops = pm;
  16439. + break;
  16440. + }
  16441. + }
  16442. + rcu_read_unlock();
  16443. +}
  16444. +
  16445. +/* Manage refcounts on socket close. */
  16446. +void mptcp_cleanup_path_manager(struct mptcp_cb *mpcb)
  16447. +{
  16448. + module_put(mpcb->pm_ops->owner);
  16449. +}
  16450. +
  16451. +/* Fallback to the default path-manager. */
  16452. +void mptcp_fallback_default(struct mptcp_cb *mpcb)
  16453. +{
  16454. + struct mptcp_pm_ops *pm;
  16455. +
  16456. + mptcp_cleanup_path_manager(mpcb);
  16457. + pm = mptcp_pm_find("default");
  16458. +
  16459. + /* Cannot fail - it's the default module */
  16460. + try_module_get(pm->owner);
  16461. + mpcb->pm_ops = pm;
  16462. +}
  16463. +EXPORT_SYMBOL_GPL(mptcp_fallback_default);
  16464. +
  16465. +/* Set default value from kernel configuration at bootup */
  16466. +static int __init mptcp_path_manager_default(void)
  16467. +{
  16468. + return mptcp_set_default_path_manager(CONFIG_DEFAULT_MPTCP_PM);
  16469. +}
  16470. +late_initcall(mptcp_path_manager_default);
  16471. diff -Nur linux-3.14.45.orig/net/mptcp/mptcp_wvegas.c linux-3.14.45/net/mptcp/mptcp_wvegas.c
  16472. --- linux-3.14.45.orig/net/mptcp/mptcp_wvegas.c 1970-01-01 01:00:00.000000000 +0100
  16473. +++ linux-3.14.45/net/mptcp/mptcp_wvegas.c 2015-06-24 14:15:48.931862523 +0200
  16474. @@ -0,0 +1,270 @@
  16475. +/*
  16476. + * MPTCP implementation - WEIGHTED VEGAS
  16477. + *
  16478. + * Algorithm design:
  16479. + * Yu Cao <cyAnalyst@126.com>
  16480. + * Mingwei Xu <xmw@csnet1.cs.tsinghua.edu.cn>
  16481. + * Xiaoming Fu <fu@cs.uni-goettinggen.de>
  16482. + *
  16483. + * Implementation:
  16484. + * Yu Cao <cyAnalyst@126.com>
  16485. + * Enhuan Dong <deh13@mails.tsinghua.edu.cn>
  16486. + *
  16487. + * Ported to the official MPTCP-kernel:
  16488. + * Christoph Paasch <christoph.paasch@uclouvain.be>
  16489. + *
  16490. + * This program is free software; you can redistribute it and/or
  16491. + * modify it under the terms of the GNU General Public License
  16492. + * as published by the Free Software Foundation; either version
  16493. + * 2 of the License, or (at your option) any later version.
  16494. + */
  16495. +
  16496. +#include <linux/skbuff.h>
  16497. +#include <net/tcp.h>
  16498. +#include <net/mptcp.h>
  16499. +#include <linux/module.h>
  16500. +#include <linux/tcp.h>
  16501. +
  16502. +static int initial_alpha = 2;
  16503. +static int total_alpha = 10;
  16504. +static int gamma = 1;
  16505. +
  16506. +module_param(initial_alpha, int, 0644);
  16507. +MODULE_PARM_DESC(initial_alpha, "initial alpha for all subflows");
  16508. +module_param(total_alpha, int, 0644);
  16509. +MODULE_PARM_DESC(total_alpha, "total alpha for all subflows");
  16510. +module_param(gamma, int, 0644);
  16511. +MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)");
  16512. +
  16513. +#define MPTCP_WVEGAS_SCALE 16
  16514. +
  16515. +/* wVegas variables */
  16516. +struct wvegas {
  16517. + u32 beg_snd_nxt; /* right edge during last RTT */
  16518. + u8 doing_wvegas_now;/* if true, do wvegas for this RTT */
  16519. +
  16520. + u16 cnt_rtt; /* # of RTTs measured within last RTT */
  16521. + u32 sampled_rtt; /* cumulative RTTs measured within last RTT (in usec) */
  16522. + u32 base_rtt; /* the min of all wVegas RTT measurements seen (in usec) */
  16523. +
  16524. + u64 instant_rate; /* cwnd / srtt_us, unit: pkts/us * 2^16 */
  16525. + u64 weight; /* the ratio of subflow's rate to the total rate, * 2^16 */
  16526. + int alpha; /* alpha for each subflows */
  16527. +
  16528. + u32 queue_delay; /* queue delay*/
  16529. +};
  16530. +
  16531. +
  16532. +static inline u64 mptcp_wvegas_scale(u32 val, int scale)
  16533. +{
  16534. + return (u64) val << scale;
  16535. +}
  16536. +
  16537. +static void wvegas_enable(struct sock *sk)
  16538. +{
  16539. + const struct tcp_sock *tp = tcp_sk(sk);
  16540. + struct wvegas *wvegas = inet_csk_ca(sk);
  16541. +
  16542. + wvegas->doing_wvegas_now = 1;
  16543. +
  16544. + wvegas->beg_snd_nxt = tp->snd_nxt;
  16545. +
  16546. + wvegas->cnt_rtt = 0;
  16547. + wvegas->sampled_rtt = 0;
  16548. +
  16549. + wvegas->instant_rate = 0;
  16550. + wvegas->alpha = initial_alpha;
  16551. + wvegas->weight = mptcp_wvegas_scale(1, MPTCP_WVEGAS_SCALE);
  16552. +
  16553. + wvegas->queue_delay = 0;
  16554. +}
  16555. +
  16556. +static inline void wvegas_disable(struct sock *sk)
  16557. +{
  16558. + struct wvegas *wvegas = inet_csk_ca(sk);
  16559. +
  16560. + wvegas->doing_wvegas_now = 0;
  16561. +}
  16562. +
  16563. +static void mptcp_wvegas_init(struct sock *sk)
  16564. +{
  16565. + struct wvegas *wvegas = inet_csk_ca(sk);
  16566. +
  16567. + wvegas->base_rtt = 0x7fffffff;
  16568. + wvegas_enable(sk);
  16569. +}
  16570. +
  16571. +static inline u64 mptcp_wvegas_rate(u32 cwnd, u32 rtt_us)
  16572. +{
  16573. + return div_u64(mptcp_wvegas_scale(cwnd, MPTCP_WVEGAS_SCALE), rtt_us);
  16574. +}
  16575. +
  16576. +static void mptcp_wvegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us)
  16577. +{
  16578. + struct wvegas *wvegas = inet_csk_ca(sk);
  16579. + u32 vrtt;
  16580. +
  16581. + if (rtt_us < 0)
  16582. + return;
  16583. +
  16584. + vrtt = rtt_us + 1;
  16585. +
  16586. + if (vrtt < wvegas->base_rtt)
  16587. + wvegas->base_rtt = vrtt;
  16588. +
  16589. + wvegas->sampled_rtt += vrtt;
  16590. + wvegas->cnt_rtt++;
  16591. +}
  16592. +
  16593. +static void mptcp_wvegas_state(struct sock *sk, u8 ca_state)
  16594. +{
  16595. + if (ca_state == TCP_CA_Open)
  16596. + wvegas_enable(sk);
  16597. + else
  16598. + wvegas_disable(sk);
  16599. +}
  16600. +
  16601. +static void mptcp_wvegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
  16602. +{
  16603. + if (event == CA_EVENT_CWND_RESTART) {
  16604. + mptcp_wvegas_init(sk);
  16605. + } else if (event == CA_EVENT_LOSS) {
  16606. + struct wvegas *wvegas = inet_csk_ca(sk);
  16607. + wvegas->instant_rate = 0;
  16608. + }
  16609. +}
  16610. +
  16611. +static inline u32 mptcp_wvegas_ssthresh(struct tcp_sock *tp)
  16612. +{
  16613. + return min(tp->snd_ssthresh, tp->snd_cwnd - 1);
  16614. +}
  16615. +
  16616. +static u64 mptcp_wvegas_weight(struct mptcp_cb *mpcb, struct sock *sk)
  16617. +{
  16618. + u64 total_rate = 0;
  16619. + struct sock *sub_sk;
  16620. + struct wvegas *wvegas = inet_csk_ca(sk);
  16621. +
  16622. + if (!mpcb)
  16623. + return wvegas->weight;
  16624. +
  16625. +
  16626. + mptcp_for_each_sk(mpcb, sub_sk) {
  16627. + struct wvegas *sub_wvegas = inet_csk_ca(sub_sk);
  16628. +
  16629. + /* sampled_rtt is initialized by 0 */
  16630. + if (mptcp_sk_can_send(sub_sk) && (sub_wvegas->sampled_rtt > 0))
  16631. + total_rate += sub_wvegas->instant_rate;
  16632. + }
  16633. +
  16634. + if (total_rate && wvegas->instant_rate)
  16635. + return div64_u64(mptcp_wvegas_scale(wvegas->instant_rate, MPTCP_WVEGAS_SCALE), total_rate);
  16636. + else
  16637. + return wvegas->weight;
  16638. +}
  16639. +
  16640. +static void mptcp_wvegas_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
  16641. +{
  16642. + struct tcp_sock *tp = tcp_sk(sk);
  16643. + struct wvegas *wvegas = inet_csk_ca(sk);
  16644. +
  16645. + if (!wvegas->doing_wvegas_now) {
  16646. + tcp_reno_cong_avoid(sk, ack, acked, in_flight);
  16647. + return;
  16648. + }
  16649. +
  16650. + if (after(ack, wvegas->beg_snd_nxt)) {
  16651. + wvegas->beg_snd_nxt = tp->snd_nxt;
  16652. +
  16653. + if (wvegas->cnt_rtt <= 2) {
  16654. + tcp_reno_cong_avoid(sk, ack, acked, in_flight);
  16655. + } else {
  16656. + u32 rtt, diff, q_delay;
  16657. + u64 target_cwnd;
  16658. +
  16659. + rtt = wvegas->sampled_rtt / wvegas->cnt_rtt;
  16660. + target_cwnd = div_u64(((u64)tp->snd_cwnd * wvegas->base_rtt), rtt);
  16661. +
  16662. + diff = div_u64((u64)tp->snd_cwnd * (rtt - wvegas->base_rtt), rtt);
  16663. +
  16664. + if (diff > gamma && tp->snd_cwnd <= tp->snd_ssthresh) {
  16665. + tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1);
  16666. + tp->snd_ssthresh = mptcp_wvegas_ssthresh(tp);
  16667. +
  16668. + } else if (tp->snd_cwnd <= tp->snd_ssthresh) {
  16669. + tcp_slow_start(tp, acked);
  16670. + } else {
  16671. + if (diff >= wvegas->alpha) {
  16672. + wvegas->instant_rate = mptcp_wvegas_rate(tp->snd_cwnd, rtt);
  16673. + wvegas->weight = mptcp_wvegas_weight(tp->mpcb, sk);
  16674. + wvegas->alpha = max(2U, (u32)((wvegas->weight * total_alpha) >> MPTCP_WVEGAS_SCALE));
  16675. + }
  16676. + if (diff > wvegas->alpha) {
  16677. + tp->snd_cwnd--;
  16678. + tp->snd_ssthresh = mptcp_wvegas_ssthresh(tp);
  16679. + } else if (diff < wvegas->alpha) {
  16680. + tp->snd_cwnd++;
  16681. + }
  16682. +
  16683. + /* Try to drain link queue if needed*/
  16684. + q_delay = rtt - wvegas->base_rtt;
  16685. + if ((wvegas->queue_delay == 0) || (wvegas->queue_delay > q_delay))
  16686. + wvegas->queue_delay = q_delay;
  16687. +
  16688. + if (q_delay >= 2 * wvegas->queue_delay) {
  16689. + u32 backoff_factor = div_u64(mptcp_wvegas_scale(wvegas->base_rtt, MPTCP_WVEGAS_SCALE), 2 * rtt);
  16690. + tp->snd_cwnd = ((u64)tp->snd_cwnd * backoff_factor) >> MPTCP_WVEGAS_SCALE;
  16691. + wvegas->queue_delay = 0;
  16692. + }
  16693. + }
  16694. +
  16695. + if (tp->snd_cwnd < 2)
  16696. + tp->snd_cwnd = 2;
  16697. + else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
  16698. + tp->snd_cwnd = tp->snd_cwnd_clamp;
  16699. +
  16700. + tp->snd_ssthresh = tcp_current_ssthresh(sk);
  16701. + }
  16702. +
  16703. + wvegas->cnt_rtt = 0;
  16704. + wvegas->sampled_rtt = 0;
  16705. + }
  16706. + /* Use normal slow start */
  16707. + else if (tp->snd_cwnd <= tp->snd_ssthresh)
  16708. + tcp_slow_start(tp, acked);
  16709. +}
  16710. +
  16711. +
  16712. +static struct tcp_congestion_ops mptcp_wvegas __read_mostly = {
  16713. + .flags = TCP_CONG_RTT_STAMP,
  16714. + .init = mptcp_wvegas_init,
  16715. + .ssthresh = tcp_reno_ssthresh,
  16716. + .cong_avoid = mptcp_wvegas_cong_avoid,
  16717. + .min_cwnd = tcp_reno_min_cwnd,
  16718. + .pkts_acked = mptcp_wvegas_pkts_acked,
  16719. + .set_state = mptcp_wvegas_state,
  16720. + .cwnd_event = mptcp_wvegas_cwnd_event,
  16721. +
  16722. + .owner = THIS_MODULE,
  16723. + .name = "wvegas",
  16724. +};
  16725. +
  16726. +static int __init mptcp_wvegas_register(void)
  16727. +{
  16728. + BUILD_BUG_ON(sizeof(struct wvegas) > ICSK_CA_PRIV_SIZE);
  16729. + tcp_register_congestion_control(&mptcp_wvegas);
  16730. + return 0;
  16731. +}
  16732. +
  16733. +static void __exit mptcp_wvegas_unregister(void)
  16734. +{
  16735. + tcp_unregister_congestion_control(&mptcp_wvegas);
  16736. +}
  16737. +
  16738. +module_init(mptcp_wvegas_register);
  16739. +module_exit(mptcp_wvegas_unregister);
  16740. +
  16741. +MODULE_AUTHOR("Yu Cao, Enhuan Dong");
  16742. +MODULE_LICENSE("GPL");
  16743. +MODULE_DESCRIPTION("MPTCP wVegas");
  16744. +MODULE_VERSION("0.1");