From f4b68d1ab0f3ae106edfb09d9f5c304fb70b995e Mon Sep 17 00:00:00 2001 From: Audrey Dutcher Date: Mon, 17 Oct 2022 20:09:38 -0700 Subject: [PATCH] data can now be generated. I think. --- typetapper/__init__.py | 0 .../__pycache__/__init__.cpython-310.pyc | Bin 0 -> 149 bytes .../__pycache__/analysis.cpython-310.pyc | Bin 0 -> 2345 bytes typetapper/__pycache__/data.cpython-310.pyc | Bin 0 -> 10238 bytes typetapper/__pycache__/engine.cpython-310.pyc | Bin 0 -> 5910 bytes .../__pycache__/knowledge.cpython-310.pyc | Bin 0 -> 772 bytes typetapper/analysis.py | 64 +++++ typetapper/data.py | 248 ++++++++++++++++++ typetapper/engine.py | 135 ++++++++++ typetapper/knowledge.py | 12 + 10 files changed, 459 insertions(+) create mode 100644 typetapper/__init__.py create mode 100644 typetapper/__pycache__/__init__.cpython-310.pyc create mode 100644 typetapper/__pycache__/analysis.cpython-310.pyc create mode 100644 typetapper/__pycache__/data.cpython-310.pyc create mode 100644 typetapper/__pycache__/engine.cpython-310.pyc create mode 100644 typetapper/__pycache__/knowledge.cpython-310.pyc create mode 100644 typetapper/analysis.py create mode 100644 typetapper/data.py create mode 100644 typetapper/engine.py create mode 100644 typetapper/knowledge.py diff --git a/typetapper/__init__.py b/typetapper/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/typetapper/__pycache__/__init__.cpython-310.pyc b/typetapper/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7a5001f6b9f51154ca705d30ad038776f8d3ea72 GIT binary patch literal 149 zcmd1j<>g`kg0~9p$sqbMh(HF6K#l_t7qb9~6oz01O-8?!3`HPe1o6v4KO;XkRX?#b zr6{#hzn~~TOFuC$y-2^LvLLl2v7jIog&7~8nU`4-AFo$Xd5gm)H$SB`C)EyQVlfkt HU||3N3BMv| literal 0 HcmV?d00001 diff --git a/typetapper/__pycache__/analysis.cpython-310.pyc b/typetapper/__pycache__/analysis.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5fe36f27ec376ac02872143794c9ce1ebbbabfbb GIT binary patch literal 2345 zcmahKO>Y}TbY}L$>)o|++9Yj?L<3Yv=8z!4fkRZ4ex&8lAknH)Euz)dGj_e+FVC*i z##&1S=gJjvbdDSl2mZ|*;nXAIhB)xvY`$7mVpsF#&6_v#zTaf6mP_FK?e)X{AAtRd zi;JHLi(4>?0RT=ojY&d#lp@Yz)?+XmF-we|p=oApCRWeVxE0$;qt^i3=8f1%n!TpR zo!Cvho=3@j!kgSZBHR@$@)t(0#n~g$^(OuQ@Q7tE2BRc_8Hu8`XP>2-+@s)08r721f(Jn zU9!?R#+6xGu(n~C3kK^3U?g&iYh{zu1^oedmhLg3aBAt{%n1l)d$V=$!e%lp%FSnD zGtFcY#-o|wU0<02D5BW-nOc!zG8IJ`jMMBzEI8QifEvmPlG+opfy+iNvqT#Na4tTai6HEkqXQ8P;_QW5JEP`dQJj*yOqqp<2NHAa7b4eT z`sd5-=UF1Q!zq{Ia2w@rhv`smmk_s7$L%7jg%zV>D?bFCgHbvvgTRM-iZ%e7wrPht z%z@$4+2!+4Y@PD%oJzcQy4jf^>~AgiZDb|yL~ORHj$ zeGEB2v9;^zUbi7VIIXOq4D;v8N+AIYWj(|6lpWmZJP%VIi{L=K3^Z98fOZYJhPx&n zknaPkeNuxfxV#JikpgY(Fq=A*fr;k7v+Ea)dq)i3Fch7Gan^=L;DVAY2$%!Wh z5pp<@7x|UBL6$9)v4ApKusP=UTRZ!M@7Ua)H>w6VHV8MzOg@@CsO+e%UH??$@6DYH zQk#_#b?STfcj2@<_bP9>QaKf?Z>g+fbM9BZzF{^QR?XhI8fk*a-wOH zk4Gt2-q+DM*bT?xTcI1F8FDUxgKd4L%p5wTvcj^I#ar2=nma?0ikG>(*+uD?`s~#W zGW+~pxpc8|w{{?v!??J)b#{Zi4yWZNfL&;*W*in}_o~DOCo$WUtJ54RbdV`?EDj6E za`CcM=BN}>IZ_-5S%9WSp5-UilRy>3${&gn+Q(s_i(&&W{1`8^;9yeJRq`Pc>`|Ie zOSrcvMXnmzwA4!j&Z}k-XJwFvi2$((?S7Udqf&WE22HBWps(?6yDmQqTH;ikYK*>A zZ4_QtDq0{vK!N@MTrN>NV9`p+8%S@W%QaE8HL6Y5mb+TVAS|;)o#2|@uIY#esd%Bh zT6@j-R@CGw$UtWow*c5s{Vp}xDnK9F-AD8q%nVkH+JId$R!@3_1FKcwbYaf{PWFoZ z?f*5~{J&U3H^bJ7UJC-{1_6X+3au+peh^HiVO;M>%pQ3a0X8YEqcO+7~ZRPc|IEII2l%<7E9%W+pmslZc1<|d#<%?nq-Y_jxUDNcl7c9u* zDZK18gM|m}`q8T2vgR4%$y4_gK3MREXdT|AvT}B6n{VP{QD6i23BD-$hk2d-2Um(@ ARsaA1 literal 0 HcmV?d00001 diff --git a/typetapper/__pycache__/data.cpython-310.pyc b/typetapper/__pycache__/data.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ffcd8260d792fd9aef2ece7efee7187380ee5b4a GIT binary patch literal 10238 zcmai4OOPB_R?S~kWqtaqmb&$^BEXzu&5UaMUn<%9rGm!pk~}|BhuC%1~y*XqqLn zX_c&|U9y`_$!Vrasb;#AHi@?ynWkHErEWK}&0Hzh%$M@=?lcO`nbJ&iwlpj6sm5Gs z4r5Y{`O>^MqtYsK&wSlbuFBpuRCdSOF-r@H_MFP2JumHxiS~k;LHmrixMO3)z654g z%>gqvz$_&&^J)Q@g#l)N0<)<00kdy_Igr3Csr|t0m-Uwu?FZB{+Lyg$S$`#gSy2ap zIXJ)^OkfVF!@wLKU=AfPN7PfmJT<@^PGF9zW567f^^d6Ip6x9kG-{T5TAjG-l%9g* zPO4Mramvf>*rlW13|daBXVCJDj5(H=>sfUM?PsL@c%r?i&Z7OSv_G9_e@>l4`#EVp zk!b&lT1ES+cS3kSnZTS^&ja)P0CP&Hc|pC1wijiV(<*h%D8BRswXK*AQ`ftlh8Nlw zTX#a|&AK0i_BAgE-K(9T-fmSIp>w%j4IY|dUU{`jw-G437Ol{EwbgBgPNf^P@s?GU zpi*sAd_PRp>Rv-_TrZkox~AJbuT|wz$;ZaaI?5wxzTp`qQyC@eR}Bc(QRZ6oYpof| z2BA)vxztwPo9$}oe)wUdTv1Agt{*gma$SAA@h}y-<#Mai^vdNhS1vc(s@q__P%dwE zD~$ohuD614u3WCPTJ4}h&ir!up`j0g00IB^^7)(Xrgy&5Roc6AzN6cl=PRun`h0Mw z;{}yY#~Z7X-PO(=J&*A`$dA%eajk+WZF{op@4<2ktdCfBE(YypXg9#Lk@PhuC%mV2+i;@!OtcY4(Gx3-5S0paY&30vt*&%=(RXAR>me zC&&nGKhO~wyHRiqvkKip0pIqd;1Ri+)&*rj2kbESme*|SJ3>{)YpIs!`y2WMhV07u zX&^|?BL^U76fDh_J=fN!fW<1A>&;Fi`Jma^(EHH3>-1-Uc#6np3HbKJsk4`$a)C^q zx?O2>y~j*{785=_oStucPVXnBrc%mMHcXPO97ru4T30(^T6;lPx2h(li67QkE}{4o zQDA)Ad}waq$nM){2UnV~2u-gQc4?e+PUU!brCms zBHq=|Zg&t@w1bMqu|5*W%X+QmhSC z;bA&-8t{4g7cn+WdmjhzFX573hZU^X?v&e|vLBN}eF#$lBc`IQU%;DQXOoQ{2~{}6 zwhW>TrOM@B!He$#?Bu@d^VNFYU>eqlAkeG`|+?OCR?u9^LAP2vS0mFx+QZs~9D(DSk+w2>cjrUz_fqCEP zThSJ5IRV~f6YRiYZMX@=l;-})9_Y)gQPi4R3^sm=b#f4z{v;D=*z#&6cpq4w2i>t; zb2egPA;Cn{YfQwnd?^$z>@!vhjKEZ;vhEt7)7ZqN<}%EGL}k(57#mCtB;!5{jrX_U zFW%39aWMSAco+V4%Tlp9Et=x+5GY%#&xhv6lT2jSZg?&44G_%iRh$)jO`J{$RZoOg6&o&_lGBG$Gm0e@AZwodC=Il0<&)g){do*KvivJ z-eS$T4;AGp+k(<6Ygta9j62J5Y&mGOaCOOOg!@8W4 zBcpj`Tng(}Wn7vyjLKkC1~gPhG>mFn5w-05iHzd-PZFa(8TB32A|77h!5s4MyVOq> zD7WTm_=2G5;^N`8!d%_2BTA~!3=adJ(C)N5k+&N9AgD!jHtV5X?ds5mCQm519O;{w zTnw}$hH%NunU;CnT*BY1)N+HC=-0KD{!7MHF>dT3mFp<}QGnPDdG?M`as;LXrWGPL zoH+L~?rOhWZ>f!9W>P!zm>{&T;BJ_Dt+hlWZ*pB)bJs|l zX%MX0Iz?gzjq51>j{#y5dBctDi?Z*TAU5Tty^Q!wS2?`*GJ?lNo!Bg}nL!!!V$VVE zwA_+&(lhTByczVBnc#n2l@b3NdD~fd;=C%rzs@05Gmo@I_va2%LKda_y<6I zMq~w`_re$x2}?X+Yq~{Ke2<-31XM73$DT0vlV{R&9bq1UOt%_{Aj3kpRj<|I9hvVC zi#`@PaUZFul&$|g0K`hhee(fb_sln~ZEM?x8-)8#!P52ZS0LLpV=IMT)}Yr(^m4XS z{nX(75Z=?n8oTlm?Mo6&E^hfJF|6sYlY61mT}XjT^xDMlvbcBOw|Lg z>FXP)YZ{R--R(e8yka&Hf;qQv#6$CTm~P88FSKsIAQ3exFQUAZ*apUDdJ}+5`-=dJ zFhYywJpPWD)|b|oi*_$BI})ulCN^pqi-?UTC@^;5L=bGk%##7BiRkl4=vgh3Rs+t;^-#Ll?0&v@|&r7wm|t2rDjrjI=cRvyeRhX8-~AlSDBP z+qAH+&EG;K{5hgvc#Sjg8mar~2Z&X~f8b5QAAW9Zx@$&|4RY{XRyl*+$w$4mTp3%y z*ozz+_vWsm-)OeU5i=N}gIVb@_W=G7nlQVti=c(ha@{69H=xCqr_7gf_W(Q1ke=h*C)W&dNJmS4`J}ZqAPL z%M8#uCl)FeCw=b>Q9R!fH}A2wOdzQc9(G=<0y*)T+ZTnAJhoaBFd)=D0^wl<$cQX@ z7nMP{c+^@okD56vmt9N?t_s}PLu;{PK+(q;2%>!s;<;aoG4pvDze>K5E^7Hu|u z;f%d3+^eHfa$C5@|ZMy|g6w!-Uf0j)V)fYDdwk+mn*9bSfK)vPxsIJT$NoSKK@ zo>vEPVMbVhF_tH55610NOLtSH1+^dR9Pk!5_u-PgtX42)NiE$g2dKw-vj zcXieCBh8@Q8Z#Yqiw=rdH9Fcp?r{n(VFy|G$W2nh2&n98!;jQ(T;!qK@;)lJx{bzP zqoW*xg4J7y30RG>_-KIa_Vh)5jDiC~0fW^Y#5^|;X2XDuXn|_U_LB|NKf#awlwdcK zxZK|WDI^`3BuR*2axKhM@u30Nt19IXKde*xL@vA~_IVM2`J!YH7YTKJn4+PaK+jq9 z69N$%3vHY~lG(Reo1ijF(MV46Z-Esuk07%)COJWi-Ko-^#SiNwk-riT2MAbTCt)dJ zCy~JvJ4yd51WsOKIgAcNOe2MPgQhVbn?`stxPu2~&HBNZQM`hlu~GaK^(eg{Rzz46 zQ<7U_T1Bkl-=mMPb=2sc9uf|J(B$s&{SpT*0YqsQ>Ka@EQ%14&V^@a+VONEgbqt7hRjlpAu4WK@wOgM02hL69 zUNU?0L!t)AFdbC%4SaA*-O!cJ&9S|u+HrG)xKpTLKN*|RSK8YSydZ9yP(6G!yJzE` zYRg^DLKoXkmf-e%2PKyBw2*)@1j+IMwIRA>aM9FWL28Ydhk!^E)idE-SV&ucTd@LMX{CTO;-y)bN*daJaKt0pn0ibOO zK5FY*ABUOC_17sX&EPF@l7@;W`}9{p`cy+^3Yd4Zta;P2mr#zF2khhaN~B;_bQybc zcw0yD=~>6!fWkR|*~cACZeI}l;2q{)IrxCbk9cY1KXDiH5bpAWdJX0tpU;u?R0S`) znZq3{4cwy&{_=LE)#(OTTDA6Sm_??i;UoI>!-YX!r#xzMI@(kCGSjW% z)97Zm*^vm?jXQ{e?H!K4F&%5(>K&Mp*(!%bXD1!`)6h{KtQ14Xx3Q6-?vwBKCD~Vr^dZ zDLl3yFT>jpY&5b*$YTKS+;gIvP%$-~c;sT&P#)1@iN#KDGp&PL`k0*#`3Wk}?FNz? zA6=~S8u)g*^U#W-Stju(M56Da?Wdes?3K|wH!Xqaf@|kJ$4G)3VRc*iWYA+n&?dE;x2Ik1UPjI!7UM;*5=?{sH>_ zFZ)xC9HX~5oEj%9%9801feJJ2ZUBei^J_bQH;Pj^cDYIYIXdeM!ASzqI{IALC;Y>X z{tGsWrQA&i{tVdvL6NhNGmctEP0hCAGl);_bn5s=7}BY20_sV0Arp1ynsDdsM#CE% zG~T2_$HV^zX?jyFS=PLy_V^z{xPnlBwUB+AweJ&%>p#Mp(0-b=GX!S|&Jny!@HK+3 z6RZ*N)P|C o>#!_F#l|m7+?GZ?gTmkZyt#O2e%VT)|8ppZ2W2qEa+b{h2lzLTdH?_b literal 0 HcmV?d00001 diff --git a/typetapper/__pycache__/engine.cpython-310.pyc b/typetapper/__pycache__/engine.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0431f5937e2bfc95bf82ff78b1e95e2e5be03b7f GIT binary patch literal 5910 zcmbVQ-ESMm5x>1}QWQM17D-?+}$-+3ik}|aevp~zF=W791BZ3*2*|%OGd5m0@97e7Yww<6nh(FVtKPGmh0WF zkPquoeND&)y&hcDws7CZos>aRqt7y9k8w6{K%}|&!pCUMR8DtuLp)dGYa$I&$PAL! zmcRmLy18%20+@cfvG6qRh=qEOOR>4om2tRGkJjV@v8P&v<4qB*wIXq?yQ$8ekQuCQ zrXm@Gz#mPJGLzJ9;68DYN$E;1;TwcOW?V79glUBp+T0XQ)BetAGH!8ux3mHy3V+TJ z1@7E5M3KAP+ckFB${5-{FX(m&?IIu3?Qve><1lxbm-!hy&+v+-(;oT&E6)RWf49q>xU&F{0e_c$6GvO>o-ryH7 zGIuaCkC8X|TNrueVB{P|F7itld6m}Zm$l?8yoQnUx@YpY`8(*nMj9Qwzsldm$m`JR z8ef1`Z+vIi#>xfk=5r0$5{*=u>)i&q!r&qU(@oY-&V!`JjuEnqHCfBZ ztRL9l7&9_6GXj&tRrc9p%~DR%gQqD!2wG7q4FY)v6Kl4z6VYx;axguj-1TR5xt8oR zRS1GcyPhQAB`1;;NU{n-GZRu^k`>w3+_2);21_VA?R1k7ucn#Wt4UUBWKcbXvZ2M< z_onu2IJLjJ+79ZRL*pdvq(O^6hqB)7n2QH-L)k5vtSXxrP2atd{Iy{wC1pt`QmoIcySe9wvO#=#_|;~%X36d(M1(BCo$4n ztx?6?njg{VkgY*l3@&OrxV2WZXlNwDo~2gCLVE|O6gqp>Ks#6Y4_X`IPCc!wLL{CA zjW|kD8_kGOFw8law1bn~6wI1Uy9^?Z6*M;@?3KHRcZ6wo?Rsh*oO#n5?9B-^pzH z66}Z@dZABmGrMWd8QV@i)-U8^d*-&Ax!mX(@_JhAk8!pK&^Seci@9D}!tC+P&YaBK zvA2En%Hf%QCG)reNrkKcNlXS=A)LtkaAMt*e*{l8Jew6VKb_n`>iphfMpP;oQ8AqY z_5=ODr>HN7)0uyhR-Vax(!J&^{^b2WzmUbOxSME;0~894db^#pkc8BP6dOV&A~+nX z&1&b87hxZH2}D_~C_T2BQLo+BKnL8Aji>6Y6l<*{6;db6AgXsrE@iycIs^~iD%Mtx zkTRC4^yFa1C*&98zmgnMd2ui8_R>UIn5}G*q~l-t0H{FlHKB@+x=Z5Qo`@PkO$@}` zk1*>#!cw^{Y9_Jj%NR31dinYPz zp+SA^!M{c0z=IulFwhNtW$jq-NY=Mfco{Io*)!qIF5bF(COp&Y`eF#;9;x9ER=nqY>p97VA zAH3Qa`%aFfg=Q~mXm{4$L*BlYD4RxyJ>nGTx_uilc86q^Kwb*C^c1*ob6NI;@>g`0F*nj$%!w;gs$;=R?N@%Q`iT{Q1SC*NmZY>(!Lk%!UvbVsgyk69bBf)ES{(Uxb z1(!>BC6|c|ZOtiOet-GyJ#gxprGlM#DL=*}`53cKATgC(v^jZMfC3>kQr1TX*<-+C zox6wl>wryv&#>s8^Qtr&5=CM)a11+zj50nlq*kgh0!HnE0 zn#%@y=kIOAh@)kEuF;#x&k;LrW=|S3z-hgTm&>N;(VX9;KzicXVUd&4Tdaf#vw+-w z-UDwgwGCcG-vJ*qJm3S0BmN4|+uO6Yi&=5WNDKWUWU#zfa~G+Y{Dkw5lmmMGhJJkT zzJA>JP*N2^980}baDWmA9>jsijSpY^8<(V-t2UM+xr??c5g^B1z;T*x>ZsJrwD;eU1*qbSMO>b-@w$hNO?eM#QuAO#`QdX4 z0XAP(t`teHoeulkS~;$Bvb~Ny85;OrB^htk_+1ARrK(*i~q9Ct&1-K6Zk-z zs*91LhU`QR`QxBC15yA;0i8AqGdSiEX1GTd$pLgM|0sQM)K#Q%(oGRMdntKaG7VDJ z#m=NIfJ%tYD(=cJri+1?6B|d_Zs5)deNBGSMQ8%$L#otu&d3<*9t*gNZOYh9rIIMGWfYhhHE@UgW9mngvu0E^CLyH|`O!+ogB-u&%IKP})(44FU z^cQ_CvEmkHCG;KV0A1BzJjfhPJdyrM02xGk@hvoRW57#IUHHKmy6{6E2?tacfBMLx zeT+)M!`_imJoy=9*}2EPN&kxIU=akW7zCY|_iz#qR4E9)?bX`{Z*2OHsJzGduPRBKtp5=y z`vJ~Y@)`Q-Oc2zgC{F7s>NxyPQa=7{;rNSut|R;?xalk+-yw2=$Xy~2h&&2kSohS5y?Q*_`&b3hG@43zqktYc8ZAp*)@(9 z!Blb$q-e~tl5LtrW?K2}TfSAc$}>f`YE|v(EGVAk@v2jGB+adrmDi5f@Hcf${ssM9 zZykt*^(cPUrlX;@nJS>nc>q=%G`ERUBjFfYbS$CYu{d1PUta&KyWc!)J#R&Nfn(0A fRVS^Le>2~!kExbQDj>e$qxkU65-Xa%IdA<3_s@NL literal 0 HcmV?d00001 diff --git a/typetapper/__pycache__/knowledge.cpython-310.pyc b/typetapper/__pycache__/knowledge.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..625623ac8fdbceb2de6d3cb0c93fbad033f475e1 GIT binary patch literal 772 zcmZWnOK%e~5VpPEN0LoZRZbP+zL(~Vgiuu~RR}dgqNlxB?%GK=@jmc6l%U?)9|A}I zlCK<4k6bx1vms~^vzpm?&o?7C)9IK1{rGuVe58bYM`K7Fj8oul4nh${4XJ2ODeBCy zD#(Lg4-C&a_yQB=q2ekuQ5EMgB^N|RDt;g;)_j$$Myv6CkWW^f`L(? zKfKzu`bxH~wwJP&OKm+{Ul$B2_k=p~YvArVh&y^m1ab5W`$|9y9CzV8`XWW3DV=Ar zjUmq`wRX3SU4QcNd3j-Fd*j0;{wzYYA?A#)aSu_#A{q0%(`L~Zek|QOODDt#8g&tfh>qz2{e1l2(kJ`W zc@|+^Eh{aA9}7`6YGY7Oh4`?M=1+tp2tR$hpLZrZZJ)r19`!&Rpbuv!z#YDuggm9a zCrf< 'Op': + raise NotImplementedError + +@dataclass(frozen=True) +class ConstOffsetOp(Op): + const: int + + def invert(self): + return ConstOffsetOp(-self.const) + +@dataclass(frozen=True) +class VarOffsetOp(Op): + var: Any + + def invert(self): + # TODO ???? + return self + +@dataclass(frozen=True) +class DerefOp(Op): + size: int + + def invert(self): + return RefOp(self.size) + +@dataclass(frozen=True) +class RefOp(Op): + size: int + + def invert(self): + return DerefOp(self.size) + +#@dataclass(frozen=True) +#class OtherOp(Op): +# def invert(self) -> 'Op': +# return self + +@dataclass(frozen=True) +class OpSequence: + ops: Tuple[Op, ...] = () + + def __add__(self, other: 'OpSequence') -> 'OpSequence': + seq = list(self.ops) + seq.extend(other.ops) + simplify_op_sequence(seq) + return OpSequence(tuple(seq)) + + def appended(self, *op: Op) -> 'OpSequence': + seq = list(self.ops) + seq.extend(op) + simplify_op_sequence(seq) + return OpSequence(tuple(seq)) + + @staticmethod + def concat(*sequences: 'OpSequence') -> 'OpSequence': + seq = [] + for s in sequences: + seq.extend(s.ops) + simplify_op_sequence(seq) + return OpSequence(tuple(seq)) + + def invert(self) -> 'OpSequence': + return OpSequence(tuple(x.invert() for x in reversed(self.ops))) + +def simplify_op_sequence(seq: List[Op]): + i = 0 + while i < len(seq): + cur = seq[i] + if isinstance(cur, ConstOffsetOp) and cur.const == 0: + seq.pop(i) + continue + nex = seq[i + 1] if i + 1 < len(seq) else None + if isinstance(cur, ConstOffsetOp) and isinstance(nex, ConstOffsetOp): + seq[i] = ConstOffsetOp(cur.const + nex.const) + seq.pop(i + 1) + continue + if isinstance(cur, RefOp) and isinstance(nex, DerefOp) and cur.size == nex.size: + seq.pop(i) + seq.pop(i) + continue + if isinstance(cur, DerefOp) and isinstance(nex, RefOp) and cur.size == nex.size: + seq.pop(i) + seq.pop(i) + continue + + i += 1 + +class DataKind(Enum): + Int = auto() + Float = auto() + Pointer = auto() + +@dataclass +class Prop: + self_data: defaultdict[DataKind, int] = field(default_factory=lambda: defaultdict(int)) + struct_data: defaultdict[int, defaultdict[int, defaultdict[DataKind, int]]] = field(default_factory=lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(int)))) + unifications: Set[Tuple[int, int]] = field(default_factory=set) + + def update(self, other: 'Prop'): + for kind, v in other.self_data.items(): + self.self_data[kind] += v + for offset, v1 in other.struct_data.items(): + for size, v2 in v1.items(): + for kind, v3 in v2.items(): + self.struct_data[offset][size][kind] += v3 + self.unifications.update(other.unifications) + + def __or__(self, other: 'Prop'): + result = Prop() + result.update(self) + result.update(other) + return result + + def transform(self, ops: OpSequence): + result = copy.deepcopy(self) + for op in ops.ops: + if isinstance(op, RefOp): + result.struct_data.clear() + result.struct_data[0][op.size] = result.self_data + result.self_data = defaultdict(int) + self.unifications.clear() + elif isinstance(op, DerefOp): + result.self_data = result.struct_data[0][op.size] + result.struct_data.clear() + self.unifications.clear() + elif isinstance(op, ConstOffsetOp): + items = list(result.struct_data.items()) + result.struct_data.clear() + for k, v in items: + result.struct_data[k + op.const] = v + result.self_data.clear() # TODO ??? + result.unifications = {(x + op.const, y + op.const) for x, y in result.unifications} + else: + result = Prop() + return result + +@dataclass(frozen=True) +class LiveData: + sources: List[Tuple[Atom, OpSequence]] + const: Optional[int] + size: int + + @classmethod + def new_null(cls, size: int): + return cls([], None, size) + + @classmethod + def new_atom(cls, atom: Atom) -> 'LiveData': + return cls([(atom, OpSequence())], None, atom.size) + + @classmethod + def new_const(cls, value: int, size: int, codeloc: CodeLoc) -> 'LiveData': + return cls([(ConstAtom(codeloc, size, value), OpSequence())], value, size) + + def appended(self, op: Op, size: int) -> 'LiveData': + return LiveData([(atom, seq.appended(op)) for atom, seq in self.sources], self.const, size) + + def unioned(self, other: 'LiveData', size: int, const: Optional[int]=None) -> 'LiveData': + return LiveData(self.sources + other.sources, const, size) + + def commit(self, target: Atom, graph: networkx.DiGraph): + for src, seq in self.sources: + graph.add_edge(src, target, ops=seq) + +@dataclass(frozen=True) +class RegisterInputInfo: + callsites: Tuple[int, ...] + # when we go back through a ret, we push the callsite onto this stack. we may then only go back through calls if + # they match the top of the stack, at which point they are popped off + reverse_callsites: Tuple[int, ...] + # when we go back through a call and there is nothing on the callstack, an entry is pushed onto this stack. + # not sure what this indicates yet + + def step(self, pred: int, succ: int, jumpkind: str, callsite: Optional[int]) -> 'Optional[RegisterInputInfo]': + if jumpkind == 'Ijk_Ret': + return RegisterInputInfo(callsites=self.callsites + (callsite,), reverse_callsites=self.reverse_callsites) + elif jumpkind == 'Ijk_Call': + if not self.callsites: + return RegisterInputInfo(callsites=(), reverse_callsites=self.reverse_callsites + (pred,)) + elif self.callsites[-1] == pred: + return RegisterInputInfo(callsites=self.callsites[:-1], reverse_callsites=self.reverse_callsites) + else: + return None + else: + return RegisterInputInfo(callsites=self.callsites, reverse_callsites=self.reverse_callsites) + + def commit(self, graph: networkx.DiGraph, source: RegisterAtom, dest: RegisterAtom): + actions: List[ControlFlowAction] = [ControlFlowActionPop(i) for i in self.callsites] + actions += [ControlFlowActionPush(i) for i in self.reverse_callsites] + graph.add_edge(source, dest, ops=OpSequence(), cf=actions) + + +@dataclass(frozen=True) +class ControlFlowAction: + pass + +@dataclass(frozen=True) +class ControlFlowActionPush(ControlFlowAction): + callsite: int + +@dataclass(frozen=True) +class ControlFlowActionPop(ControlFlowAction): + callsite: int + + +@dataclass +class BlockInfo: + outputs: Dict[str, RegisterAtom] = field(default_factory=lambda: {}) # slot names + inputs: Dict[RegisterAtom, RegisterInputInfo] = field(default_factory=dict) diff --git a/typetapper/engine.py b/typetapper/engine.py new file mode 100644 index 0000000..e9b4355 --- /dev/null +++ b/typetapper/engine.py @@ -0,0 +1,135 @@ +import logging + +import angr +import pyvex + +from .data import * +from .knowledge import TypeTapperManager + +l = logging.getLogger(__name__) + +def get_type_size_bytes(ty): + return pyvex.get_type_size(ty) // 8 + +class TypeTapperEngine(angr.engines.vex.VEXMixin): + def __init__(self, project: angr.Project, kp: TypeTapperManager, **kwargs): + super().__init__(project, **kwargs) + self.kp = kp + + tmps: List[TmpAtom] + + @property + def codeloc(self): + return CodeLoc(bbl_addr=self.irsb.addr, stmt_idx=self.stmt_idx) + + @property + def graph(self): + return self.kp.graph + + @property + def blockinfo(self): + return self.kp.block_info[self.irsb.addr] + + def _handle_vex_const(self, const): + return LiveData.new_const(const.value, get_type_size_bytes(const.type), self.codeloc) + + def _perform_vex_expr_RdTmp(self, tmp): + return LiveData.new_atom(self.tmps[tmp]) + + def _perform_vex_expr_Get(self, offset: LiveData, ty, **kwargs): + size = get_type_size_bytes(ty) + if type(offset.const) is not int: + return LiveData.new_null(size) + name = self.project.arch.register_size_names[(offset.const, size)] # unsafe + slot_info = self.project.arch.get_base_register(offset.const, size) + if slot_info is None: + l.error("???????") + return LiveData.new_null(size) + slot_name = self.project.arch.register_size_names[slot_info] + reg_atom = RegisterAtom(self.codeloc, size, name, slot_name) + + source = self.blockinfo.outputs.get(slot_name, None) + if source is not None: + if source.name == reg_atom.name: + self.graph.add_edge(source, reg_atom, ops=OpSequence()) + else: + pass # alias mismatch + else: + self.blockinfo.inputs[reg_atom] = RegisterInputInfo(callsites=(), reverse_callsites=()) + + return LiveData.new_atom(reg_atom) + + def _perform_vex_expr_Load(self, addr: LiveData, ty, endness, **kwargs): + size = get_type_size_bytes(ty) + mem_atom = MemoryAtom(self.codeloc, size, endness) + addr.appended(DerefOp(size), size).commit(mem_atom, self.graph) + return LiveData.new_atom(mem_atom) + + def _perform_vex_expr_CCall(self, func_name, ty, args, func=None): + return LiveData.new_null(get_type_size_bytes(ty)) + + def _perform_vex_expr_ITE(self, cond, ifTrue: LiveData, ifFalse: LiveData): + assert ifTrue.size == ifFalse.size + return ifTrue.unioned(ifFalse, ifTrue.size) + + def _perform_vex_expr_Op(self, op, args: List[LiveData]): + size = get_type_size_bytes(pyvex.get_op_retty(op)) + if op in ('Add8', 'Add16', 'Add32', 'Add64'): + sign = 1 + elif op in ('Sub8', 'Sub16', 'Sub32', 'Sub64'): + sign = -1 + else: + sign = None + if sign is not None: + assert size == args[0].size == args[1].size + addend0 = args[0].const + addend1 = args[1].const + if addend0 is not None and addend1 is not None: + const = addend0 + addend1 * sign + else: + const = None + input0 = args[0].appended(ConstOffsetOp(addend1 * sign) if addend1 is not None else VarOffsetOp(args[1]), size) + input1 = args[1].appended(ConstOffsetOp(addend0) if addend0 is not None else VarOffsetOp(args[0]), size) + result = input0.unioned(input1, size, const) + else: + result = LiveData.new_null(size) + + return result + + def _handle_vex_expr_GSPTR(self, expr: pyvex.expr.GSPTR): + return LiveData.new_null(get_type_size_bytes(expr.result_type(self.irsb.tyenv))) + + def _handle_vex_expr_VECRET(self, expr: pyvex.expr.VECRET): + return LiveData.new_null(get_type_size_bytes(expr.result_type(self.irsb.tyenv))) + + def _handle_vex_expr_Binder(self, expr: pyvex.expr.Binder): + return LiveData.new_null(get_type_size_bytes(expr.result_type(self.irsb.tyenv))) + + + def _perform_vex_stmt_Put(self, offset: LiveData, data: LiveData, **kwargs): + if type(offset.const) is not int: + return LiveData.new_null(data.size) + name = self.project.arch.register_size_names[(offset.const, data.size)] # unsafe + slot_info = self.project.arch.get_base_register(offset.const, data.size) + if slot_info is None: + l.error("???????") + return LiveData.new_null(data.size) + slot_name = self.project.arch.register_size_names[slot_info] + reg_atom = RegisterAtom(self.codeloc, data.size, name, slot_name) + data.commit(reg_atom, self.graph) + self.blockinfo.outputs[slot_name] = reg_atom + + def _perform_vex_stmt_WrTmp(self, tmp, data: LiveData): + tmp_atom = TmpAtom(self.codeloc, get_type_size_bytes(self.irsb.tyenv.lookup(tmp)), tmp) + self.tmps[tmp] = tmp_atom + data.commit(tmp_atom, self.graph) + + def _perform_vex_stmt_Store(self, addr: LiveData, data: LiveData, endness, **kwargs): + mem_atom = MemoryAtom(self.codeloc, data.size, endness) + addr.appended(DerefOp(data.size), data.size).commit(mem_atom, self.graph) + data.commit(mem_atom, self.graph) + + def _perform_vex_stmt_Dirty_call(self, func_name, ty, args, func=None): + if ty is None: + return None + return LiveData.new_null(get_type_size_bytes(ty)) diff --git a/typetapper/knowledge.py b/typetapper/knowledge.py new file mode 100644 index 0000000..5d4ed45 --- /dev/null +++ b/typetapper/knowledge.py @@ -0,0 +1,12 @@ +from typing import Dict +from collections import defaultdict +import angr +import networkx + +from .data import BlockInfo + +class TypeTapperManager(angr.knowledge_plugins.plugin.KnowledgeBasePlugin): + def __init__(self, kb: angr.KnowledgeBase): + self.kb = kb + self.graph = networkx.DiGraph() + self.block_info: Dict[int, BlockInfo] = defaultdict(BlockInfo)