From bbb936b9ea4824845c370f80a2293c729e613d3e Mon Sep 17 00:00:00 2001 From: theli Date: Mon, 14 Nov 2005 10:25:43 +0000 Subject: [PATCH] *) Bugfix for not human readable content of PDFs while viewing the URL Content via GUI - This Bug also affects the snippet generation on non html/text documents See: http://www.yacy-forum.de/viewtopic.php?t=1472 git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1075 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- libx/jmimemagic-0.0.4a.jar | Bin 55354 -> 55394 bytes .../de/anomic/plasma/plasmaSnippetCache.java | 25 +++++++++++++----- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/libx/jmimemagic-0.0.4a.jar b/libx/jmimemagic-0.0.4a.jar index 7e8b9d00addd156d92b695b79fd9162330d75e12..a95d8c566a21ae780bb0a68045bbd604afc95fbe 100644 GIT binary patch delta 1939 zcmV;E2WwOCb+lwTq z?JP^%A#Vo9_EHbAYt&A=q3!@u;w;v(q>@saY+rsn`7x^{OSWwJG$BBcSP6AM@pL4Q zf3&;*l+Pv4is3T1T!~>($Kkfx9c1ox`LVqDI|)T>cj+hKz)raIS5H7jza zniVne$NtxuYF6k>b?LB0%v7^NXR28tGu5nsnQB&Mf2zw&_1*!>+;o|n#+#dF1;|aa zqU5GoA#&5K%uN>|H@zSGyRIM335Un`NO(&|JlsD;evKMxqyy?cTR(lwM{P%kEpAeF z;LQ5FUT3pbPy)-jncoqa4~bIhS4sC39x%u<;Uuv9)#m4)*MVfx$j1JhIvryDY1EOi z=jZ^Ue~xwDf*_}UMqgWDWJ8eCA*66Jcs2l6Ih~>!c|7$N3@`Ixh&mj63xZcUy$HAM zZ0vu1%xAV;B;VY(ul<*y!ld-0B_+aZF^ejMVUdBpKI^)MYbsvsxTxZvVR+YceZTc2 z-^H^;s!F^#q^v|1rq{du%>%hjbK66zi$A>Sf1am@J{@WZ$9I*rJH><=2Zw0ibV#r9 zLfc|;;!!=IyZY(%?kHYPB>mduD$CC=FS1~vX$Pezl?sUoT$UfBjXS*<9lG8OI(JLa zC@62oum#$LC(XBw!5_UAfzs2c{2n2SE)f2ROz-{;N~Bs*o;=Y4ltP`If{-MLP-?fT zf1Xz=p{V5f;=@_h>EGf0)up;5NJ%2C#5b*a6So^$oSL`^2JOLJ3paujJH5MHIWAUkphhQ6LYCF zQA0$Ered*Q81NaDPgn_jiz^z)Kw$-7wrh;B8ZY%X>Wl)LO57lZYX=htH-o3!Sf9&e zGUp}3{(`C$4;n#oQ4(Z*i@<@oKEgh0@m$O1k61C_%tsire_*wr ze+i~_Pza||9=B~GgY~C(BkPmn=2)o;avJWRu%CQks57)c-1mhuQm99)SJle`j9RJf zj!;(v3w&S4Tt+1(g1!Z>G`hUm)NGUgB^&`CV6wgI`aC>c7Tr@{SdKDPxv(B>o^nBe z)A1YBHUb-Njnjd&{oFArcI#mi^HINBI^!BS8Bu^la*&oh4s29;FYh0u;i9zPfU(EY5wb0E<~Els)NHl98mIC5!@) zV9Qdk+V1m-=Ss-%&O8fsJ|FK{?f7OR>Cz01Dhiq{w~eeF$k~m5c9EFe=FU<`ydp@J5RktFa@kEb@dCZ8wOCES7lWk z6k)*SL^#yO+qW+Xj^IVOe{U1pr6y%IU%$iYh>f46N)+D5KTlhsjY6rp*NIcJb5M-i8B|{Djb@k*JWJG=bl|Jd_V5Elz zngeHtZa%ul+qI^q?o<%dolxPROj7(8#d4Kj2oT99uFs3ZQ*ZX6fAQA6SlHl>av>4u zFx$2m)Njct?AJdFjnNP$X%3oL7az|j0mTGD+3Anfixg|}H=Tr%C&}f$NYYCuK2|={ z2O6ocFufQpvGAL|PU6-erIfiEs&kJTWBrtBX_PJ=D;}58S{Gid(uJ-hJ4=S|PlmNX z$zhENXIKNnQnDD4e;`I2MoDLe6y$`GlBcUy3P8#(_hr&ta`40UVf(k z>WB3YZ1X#5ThX0|YIrf4_s*j8x%4DQSzm6T)wT@H%C;afuHwRrF(T5s%2I%AJd>Ns zlL-HfaX-)2HzVa}1Rt{6(FpZ^qurhgVQ1icC%QU5{`v8mDnp3tPam(9?$z<@PoHX$ zT#HXuJFoiX(?9(Sv+JVaGY-H_ZZqG1GaA|+003sYliJot0u!T?BiB0t_LE=NIsr$M Zn%6%8YLnX6IstE!71$pJFs+2T*!P(yHwT%uy5d+~|DA>+ak{jZ5hQExHXl57 z8EBV-C-%86mgxo-i8Mj>SK42H)7b~Hn($%HO7$NHrFvINH7SEqO%6)+L8&fOsU~Gs zs!17zKk~m;s!6Gp>O+Gqqf$*utyGgzD%GTfN;Nqsf7J)2dg}lOZTg^1!_}rq3AAZa z7Hyi8LYpQBZMqEF^mgp;1S;MWj??!D(@TadzCVS2vn@5U0d=07pWf$G+jh6b987ks ztiSEHSH}uUVL5Y(OObh(N>aZ_d#`YZPL^;#h2<|-zyG2Vk_noR{RK@r`0{MjR^PLC z0JgNwe`^pFG|%X1D`-9h1tlRhnZfe`xF{%#sx>|J8VoOrVX!3!UxVOfK^@_`m5=?e z_Bm(US^CR;+q!=lN0=1K9#SHHmT;(oQy2C1^Xb$zK2!00!$TGS4B~IiHuqaj@?A2E zHLAq(U51s&!}MCGw|XJBYHxdhb@975Jq-2If2WNp_f;U>%_+v1?i`|Z)kfX=b8U;m zP>A({?&7Cco1=sUk@TwPi!49CILm_t=ddV^5nYuK6nFB_?z3L^+c)!QQKq~N{gnwy zc+_}P@Bi6tB2#*n@Vk6FW3j2=YAz7_kQuVBvs< zu*M99Vn3(mGb|pU5-68R9EnIm1t4BPe@9S_7y2*k4dzW5kI=d&kv>4=o=5TZVk3^HLK45!gL8K4~ zKy6QQADYFHWI`e=sLSt|qumPkfDz1XXHuGrVa68{EEz}$tQ4lM*&_nAfBo2<&eP`O zD>PTxqb_qIj{#l&&dt|82(IzQ=R>ppS_Dz}o=XOIpL&kqaC5RW&>dJ03?|^ZJXFO_ z5&BGwxsN@#bNd6q9(07;7V-k@VB)dmH=Op^_^DJP;lBQvYz5W}nVWJ8QWZS=O$DGY z5sOT;2!VAudYuD_zX*FTAk(GU)5^czqQAI~-c`PjI!;~!7YGMvd@l?bFrf0FZUkz|jA8@QVgnu`-7@6EF%lGg3u(6dae-Mr(Wc+>NutCPcvnkAb=KzQ}fJDAzd{2 zM;_G>*MM}ZYcc3?e{F3F^MCW^FlDYsLe~eO`E$^9F-GfSjPl1ArN>wukLxs5K(2=E zLR5Ay`%T0{)V;mBY1M%vneQ}NI+kIWKffGRDsf3tPyQUE76by-FK*E#!k%E*F-pGg zAQhm`hD(pvde{y{@YxLGl}ggvn4LwpzY$Jl(hK2_r`dCSe=Aqb>sIITUA=bG>@;d` zkAIcplhbGV=lN&)w|-du!dCZ5&$Uk;PUDAm-WwOs_R`a2az0-{t8E!Lm32WxQpLp& z6JjXqD$4+}G9@(@N0#=R(ELJAuaxZw%Bb3Qgj%oOYE9xm=g|0$baj0A^x+aq%K7ET z50_HU>e%JS506zV*W)LPlULpO^iTf+vlFA?GY)lfjWUrs$0V{I003pWlLFUA0_&oa lOV>LA&Xb4NIsqS(!q-0mLX!g6Isr$MJ=h-xyU_pu005HLpF#it diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java index cb1be2de0..c7bc9ef3f 100644 --- a/source/de/anomic/plasma/plasmaSnippetCache.java +++ b/source/de/anomic/plasma/plasmaSnippetCache.java @@ -321,21 +321,34 @@ public class plasmaSnippetCache { if (resource == null) return null; httpHeader header = null; try { - header = cacheManager.getCachedResponse(plasmaURL.urlHash(url)); + header = this.cacheManager.getCachedResponse(plasmaURL.urlHash(url)); } catch (IOException e) {} if (header == null) { - String filename = cacheManager.getCachePath(url).getName(); + String filename = this.cacheManager.getCachePath(url).getName(); int p = filename.lastIndexOf('.'); - if ((p < 0) || - ((p >= 0) && (plasmaParser.supportedFileExtContains(filename.substring(p + 1))))) { - return parser.parseSource(url, "text/html", resource); + if ( + (p < 0) || + ((p >= 0) && (plasmaParser.supportedFileExtContains(filename.substring(p + 1)))) + ) { + String supposedMime = "text/html"; + + // if the mimeType Parser is installed we can set the mimeType to null to force + // a mimetype detection + if (plasmaParser.supportedMimeTypesContains("application/octet-stream")) { + supposedMime = null; + } else if (p != -1){ + // otherwise we try to determine the mimeType per file Extension + supposedMime = plasmaParser.getMimeTypeByFileExt(filename.substring(p + 1)); + } + + return this.parser.parseSource(url, supposedMime, resource); } else { return null; } } else { if (plasmaParser.supportedMimeTypesContains(header.mime())) { - return parser.parseSource(url, header.mime(), resource); + return this.parser.parseSource(url, header.mime(), resource); } else { return null; }