Hello,
I'm writing a modified version of the data18 scraper to scrape in adult movies downloaded from various websites (currently the scraper only scapes dvd releases). Overall, the scraper is almost done and it is mostly working well. I've gotten the <thumb> poster tags also working just fine. Now what I want to do is to use the same image files I use as my posters to give the user the choice to use them as fanart. It appears that everything with the fanart url is being generated correctly, but I'm getting several errors like the ones below in the debug log when it tries to actually load up the img in "Choose fanart" under "Movie Information" (the fanart just shows up blank in xbmc). Choose poster works just fine and it uses the same urls and spoof value as these images!
I'm using XBMC 12.2 and here is the code of my scraper below:
I'm writing a modified version of the data18 scraper to scrape in adult movies downloaded from various websites (currently the scraper only scapes dvd releases). Overall, the scraper is almost done and it is mostly working well. I've gotten the <thumb> poster tags also working just fine. Now what I want to do is to use the same image files I use as my posters to give the user the choice to use them as fanart. It appears that everything with the fanart url is being generated correctly, but I'm getting several errors like the ones below in the debug log when it tries to actually load up the img in "Choose fanart" under "Movie Information" (the fanart just shows up blank in xbmc). Choose poster works just fine and it uses the same urls and spoof value as these images!
Code:
15:03:33 T:5536 DEBUG: CTextureCacheJob::GetImageHash - unable to stat url http://199.193.118.67/1/181/39847/03.jpg
15:03:33 T:7988 DEBUG: CTextureCacheJob::GetImageHash - unable to stat url http://199.193.118.67/1/181/39847/01.jpg
15:03:33 T:1104 DEBUG: CTextureCacheJob::GetImageHash - unable to stat url http://199.193.118.67/1/181/39847/06.jpg
15:03:33 T:5536 DEBUG: CTextureCacheJob::GetImageHash - unable to stat url http://199.193.118.67/1/181/39847/07.jpg
15:03:33 T:7988 DEBUG: CTextureCacheJob::GetImageHash - unable to stat url http://199.193.118.67/1/181/39847/08.jpg
15:03:34 T:8904 DEBUG: CTextureCacheJob::GetImageHash - unable to stat url http://199.193.118.67/1/181/39847/05.jpg
15:03:34 T:5536 DEBUG: CTextureCacheJob::GetImageHash - unable to stat url http://199.193.118.67/1/181/39847/01.jpg
15:03:34 T:7988 DEBUG: CTextureCacheJob::GetImageHash - unable to stat url http://199.193.118.67/1/181/39847/03.jpg
15:03:34 T:8904 DEBUG: CTextureCacheJob::GetImageHash - unable to stat url http://199.193.118.67/1/181/39847/02.jpg
15:03:34 T:1104 DEBUG: CTextureCacheJob::GetImageHash - unable to stat url http://199.193.118.67/1/181/39847/09.jpg
I'm using XBMC 12.2 and here is the code of my scraper below:
Code:
<?xml version="1.0" encoding="utf-8"?>
<scraper framework="1.1" date="2013-05-14" name="Data18MOD" content="movies" language="en">
<CreateSearchUrl clearbuffers="no" dest="3">
<RegExp input="$$5" output="<url>\1</url>" dest="3">
<!--<!- Add film year, stored in $$2 for default by xbmc ->-->
<RegExp input="$$2" output=" (\1)" dest="4">
<expression clear="yes">(.+)</expression>
</RegExp>
<RegExp input="$$1" output="https://www.google.com/search?q=\1+site%3Adata18.com%2Fcontent&btnG=Search&output=search" dest="5">
<expression/>
</RegExp>
<expression noclean="1"/>
</RegExp>
</CreateSearchUrl>
<GetSearchResults clearbuffers="no" dest="6">
<RegExp input="$$4" output="<?xml version="1.0" encoding="UTF-8" standalone="yes"?><results>\1</results>" dest="6">
<RegExp input="$$1" output="<entity><title>\2</title><url>\1</url></entity>" dest="4+">
<expression repeat="yes" noclean="1"><h3 class="r"><a href="/url\?q=([^&]+)&(?:[^"]*)">(.+?)</a></h3></expression>
</RegExp>
<expression noclean="1"/>
</RegExp>
</GetSearchResults>
<GetDetails clearbuffers="no" dest="7">
<RegExp input="$$5" output="<?xml version="1.0" encoding="utf-8" standalone="yes"?><details>\1</details>" dest="7">
<RegExp input="$$9" output="<title>\1</title>" dest="5+">
<!--Regular title for web releases-->
<RegExp input="$$1" output="\1" dest="9">
<expression trim="1" noclean="1"><h1[^>]*>([^<]*)</h1></expression>
</RegExp>
<!--If this is a scene from a split movie, make the title "Movie - Scene # - Actress1, Actress2..."-->
<RegExp input="$$1" output="\2 - \1" dest="9">
<expression trim="1,2" noclean="1,2"><title>(Scene [0-9]+?) from (.+) - (.+) - data18.com</title></expression>
</RegExp>
<!--If this is a scene from a split movie, make the title "Movie - Scene #"-->
<RegExp input="$$1" output="\2 - \3 - \1" dest="9">
<expression trim="1,2,3" noclean="1,2,3"><title>(.+?) in (.+?)(?: - Scene [0-9]*)? - (?:.+?)</title>(?:.+?)(Scene [0-9]+)</expression>
</RegExp>
<!--Title from web episodes where there is no episode name but instead just the site and a month and year-->
<RegExp input="$$1" output="\1 - \2 - \3" dest="9">
<expression trim="1,2,3" noclean="1,2,3"><title>(.+?) in (.+?) \( (.+?) \).+?</title></expression>
</RegExp>
<expression trim="1"/>
</RegExp>
<RegExp input="$$10" output="<plot>\1</plot>" dest="5+">
<!--Get the movie plot for scenes. This will be replaced by scene plot if it exists in the next expression.-->
<RegExp input="$$1" output="\1" dest="10">
<expression>title="Go to movie profile".+<p class="line1">(.+)</p></expression>
</RegExp>
<!--Plot for web clips and some scenes with individual plots filled in-->
<RegExp input="$$1" output="\1" dest="10">
<expression><b>Story:</b> ([^<]*)</expression>
</RegExp>
<expression trim="1" noclean="1"/>
</RegExp>
<!--Actors with thumbs-->
<RegExp input="$$1" output="<actor><name>\3</name><thumb>\1/120/\2</thumb></actor>" dest="5+">
<expression repeat="yes" trim="3" noclean="1,2"><img src="(http://www.data18.com/img/stars)/60/([^"]*)"[^<]*alt="([^"]*)</expression>
</RegExp>
<!--Actors with no thumbs-->
<RegExp input="$$1" output="<actor><name>\1</name></actor>" dest="5+">
<expression repeat="yes" trim="1"><img src="http://www.data18.com/img/no_prev_60.gif"[^<]*alt="([^"]*)</expression>
</RegExp>
<!--Actors with unambiquous name and no info-->
<RegExp input="$$1" output="<actor><name>\1</name></actor>" dest="5+">
<expression repeat="yes" trim="1"><a href="http://www.data18.com/dev/[^"]*">([^<&-]*)</a></expression>
</RegExp>
<RegExp input="$$1" output="<runtime>\1</runtime>" dest="5+">
<expression>Length:</b>\s*(\d+)</expression>
</RegExp>
<!--This isn't usually listed on web clips-->
<RegExp input="$$1" output="<director>\1</director>" dest="5+">
<expression trim="1">Director:</b>[^>]*>([^<]*)</expression>
</RegExp>
<!--Studio is the network the clip is from-->
<RegExp input="$$1" output="<studio>\1</studio>" dest="5+">
<expression trim="1"><a href="http://www.data18.com/sites/[^/]+/">([^<]+)</a></expression>
</RegExp>
<RegExp input="$$10" output="<set>\1</set>" dest="5+">
<!--If this is a split scene from a full movie, use the full movie as the set name-->
<RegExp input="$$1" output="\1" dest="10">
<expression><title>Scene [0-9]+? from (.+) - (.+) - data18.com</title></expression>
</RegExp>
<!--Use related movie as set name since this is should be the movie the scene came from-->
<RegExp input="$$1" output="\1" dest="10">
<expression><title>(?:.+?) in (.+?)(?: at .+?)?(?: - Scene [0-9]*)? - (?:.+?)</title></expression>
</RegExp>
<!--Set name is the website name-->
<RegExp input="$$1" output="\1" dest="10">
<expression trim="1"><b>Where to Watch:</b> <span class="gen11">([^<]+)</span></expression>
</RegExp>
<expression trim="1"/>
</RegExp>
<RegExp input="$$11" output="<year>\1</year>" dest="5+">
<!--Year for web dls-->
<RegExp input="$$1" output="\1" dest="11">
<expression noclean="1">href="http://www.data18.com/content/date_(\d{4})[\d]+.html"</expression>
</RegExp>
<!--year for scenes-->
<RegExp input="$$1" output="\1" dest="11">
<expression>Release date: <b>.+(\d{4})</b></expression>
</RegExp>
<expression/>
</RegExp>
<RegExp input="$$8" output="<genre>\1</genre>" dest="5+">
<RegExp input="$$1" output="\1" dest="8">
<expression noclean="1"><b>Categories:</b>(.*?)</p></expression>
</RegExp>
<expression repeat="yes" trim="1"><a href=[^>]*>([^<]*)</expression>
</RegExp>
<!--Poster thumbs-->
<RegExp input="$$12" output="\1" dest="5+">
<!--Gets photo linked by trailer. This is fallback in case there is no image gallery. This will get replaced by the following expressions if possible, because the image galleries are higher resolution.-->
<RegExp input="$$1" output="<thumb spoof="http://www.data18.com">\1</thumb><fanart><thumb spoof="http://www.data18.com">\1</thumb></fanart>" dest="12">
<expression noclean="1">img src="([^"]+)" width="[0-9]+" height="[0-9]+" class="noborder" alt="Play this Video" title="Play this Video"</expression>
</RegExp>
<!--Photo Gallery Images, if they exist, will replace the trailer image. Only get the first link and assume there are 16 total images-->
<RegExp input="$$1" output="<url spoof="http://www.data18.com" function="GetPhotoFromViewer">\1</url>" dest="12">
<expression noclean="1"><a href="(http://www.data18.com/viewer/[^/]+/01)?" rel="nofollow"></expression>
</RegExp>
<!--On some pages the first photo gallery link goes to image 2 instead of image 1-->
<RegExp input="$$1" output="<url spoof="http://www.data18.com" function="GetPhotoFromViewer">\1</url>" dest="12">
<expression noclean="1"><a href="(http://www.data18.com/viewer/[^/]+/02)?" rel="nofollow"></expression>
</RegExp>
<expression noclean="1"/>
</RegExp>
<!--Store the title of the page on data18 into original title-->
<RegExp input="$$1" output="<originaltitle>\1</originaltitle>" dest="5+">
<expression><title>(.+) - data18.com</title></expression>
</RegExp>
<RegExp input="$$1" output="<releasedate>\1-\2-\3</releasedate>" dest="5+">
<expression noclean="1">href="http://www.data18.com/content/date_(\d{4})(\d{2})(\d{2})\.html"</expression>
</RegExp>
<RegExp input="$$1" output="<mpaa>NC-17</mpaa>" dest="5+">
<expression/>
</RegExp>
<expression noclean="1"/>
</RegExp>
</GetDetails>
<GetPhotoFromViewer dest="3">
<RegExp input="$$5" output="<details>\1</details>" dest="3">
<RegExp input="$$1" output="<thumb spoof="http://www.data18.com/viewer/">http://\1/\2/\3/\4/01.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">http://\1/\2/\3/\4/02.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">http://\1/\2/\3/\4/03.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">http://\1/\2/\3/\4/04.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">http://\1/\2/\3/\4/05.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">http://\1/\2/\3/\4/06.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">http://\1/\2/\3/\4/07.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">http://\1/\2/\3/\4/08.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">http://\1/\2/\3/\4/09.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">http://\1/\2/\3/\4/10.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">http://\1/\2/\3/\4/11.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">http://\1/\2/\3/\4/12.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">http://\1/\2/\3/\4/13.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">http://\1/\2/\3/\4/14.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">http://\1/\2/\3/\4/15.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">http://\1/\2/\3/\4/16.jpg</thumb>
\n<fanart url="http://\1/\2/\3/\4/">
\n<thumb spoof="http://www.data18.com/viewer/">01.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">02.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">03.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">04.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">05.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">06.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">07.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">08.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">09.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">10.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">11.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">12.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">13.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">14.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">15.jpg</thumb>
\n<thumb spoof="http://www.data18.com/viewer/">16.jpg</thumb>
\n</fanart>" dest="5">
<expression noclean="1,2,3,4"><img src="http://([^/]+)/([^/]+)/([^/]+)/([^/]+)/([0-9]+).jpg" class="noborder" alt="image" /></expression>
</RegExp>
<expression noclean="1"/>
</RegExp>
</GetPhotoFromViewer>
</scraper>