kenzu Posted January 20, 2013 Share Posted January 20, 2013 (edited) <p>Ok, so i found this script and i have some questions expandcollapse popup; ---------------------------------------------------------------------------- ; ; AutoIt Version: 3.1.1.87 ; Author: AcidicChip <acidicchip@acidicchip.com> ; ; Script Name: Web Media Spider ; Script Version: 0.21 ; ; Script Function: ; Spider the web and gather media file URLs ; ; ---------------------------------------------------------------------------- Opt("GUIOnEventMode", 1) Opt("TrayIconDebug", 1) #include <Array.au3> #include <GUIConstants.au3> Dim $collected[1] Dim $urls[1] Dim $urlon = 0 Dim $urlnum = 0 Dim $imagenum = 0 Dim $audionum = 0 Dim $videonum = 0 #region "GUI" GUICreate("Media Spider", 600, 100) $lblAction = GUICtrlCreateLabel("Action:", 0, 3, 35, 20) $txtAction = GUICtrlCreateInput("", 40, 0, 560, 20) GUICtrlSetState($txtAction, $GUI_DISABLE) $lblURL = GUICtrlCreateLabel("URL:", 0, 23, 35, 20) $txtURL = GUICtrlCreateInput("", 40, 20, 560, 20) GUICtrlSetState($txtURL, $GUI_DISABLE) $prgPercent = GUICtrlCreateProgress(0, 40, 560, 20) $txtPercent = GUICtrlCreateInput("0%", 560, 40, 40, 20) GUICtrlSetState($txtPercent, $GUI_DISABLE) $lblURLs = GUICtrlCreateLabel("URLs:", 0, 63, 35, 20) $txtURLs = GUICtrlCreateInput("0", 40, 60, 75, 20) GUICtrlSetState($txtURLs, $GUI_DISABLE) $lblAudio = GUICtrlCreateLabel("Audio:", 125, 63, 35, 20) $txtAudio = GUICtrlCreateInput("0", 160, 60, 75, 20) GUICtrlSetState($txtAudio, $GUI_DISABLE) $lblImages = GUICtrlCreateLabel("Images:", 245, 63, 36, 20) $txtImages = GUICtrlCreateInput("0", 285, 60, 75, 20) GUICtrlSetState($txtImages, $GUI_DISABLE) $lblVideos = GUICtrlCreateLabel("Videos:", 370, 63, 35, 20) $txtVideos = GUICtrlCreateInput("0", 410, 60, 75, 20) GUICtrlSetState($txtVideos, $GUI_DISABLE) $lblHistory = GUICtrlCreateLabel("History:", 490, 63, 35, 20) $txtHistory = GUICtrlCreateInput("0", 530, 60, 75, 20) GUICtrlSetState($txtHistory, $GUI_DISABLE) $lblStartURL = GUICtrlCreateLabel("Start URL:", 0, 83, 50, 20) $txtStartURL = GUICtrlCreateInput("http://www.myspace.com/acidicchip", 55, 80, 490, 20) $btnStartStop = GUICtrlCreateButton("Start", 550, 80, 50, 20) GUISetState(@SW_SHOW) GUISetOnEvent($GUI_EVENT_CLOSE, "GUIClose") GUICtrlSetOnEvent($btnStartStop, "GUIStartStop") #endregion "GUI" Func GUIClose() Exit EndFunc ;==>GUIClose Func GUIStartStop() If GUICtrlRead($btnStartStop) == "Start" Then GUICtrlSetData($btnStartStop, "Stop") GUICtrlSetState($txtStartURL, $GUI_DISABLE) FileDelete("spider.urls.txt") GetURLs(GUICtrlRead($txtStartURL)) Do ;$url = $urls[1] $urlon = $urlon + 1 $url = FileReadLine("spider.urls.txt", $urlon) ;_ArrayDelete($urls, 1) $urlnum = $urlnum - 1 GetURLs($url) Until $urlnum <= 0 Or GUICtrlRead($btnStartStop) == "Start" ;Until UBound($urls) <= 1 Or GUICtrlRead($btnStartStop) == "Start" Else GUICtrlSetData($btnStartStop, "Start") GUICtrlSetState($txtStartURL, $GUI_ENABLE) EndIf EndFunc ;==>GUIStartStop While 1 Sleep(250) Wend Func Status($action, $url, $percent) GUICtrlSetData($txtAction, $action) If $url <> "" Then GUICtrlSetData($txtURL, $url) GUICtrlSetData($prgPercent, $percent) GUICtrlSetData($txtPercent, $percent & "%") GUICtrlSetData($txtURLs, $urlnum) ;GUICtrlSetData($txtURLs, UBound($urls)) GUICtrlSetData($txtAudio, $audionum) GUICtrlSetData($txtImages, $imagenum) GUICtrlSetData($txtVideos, $videonum) GUICtrlSetData($txtHistory, UBound($collected)) EndFunc ;==>Status Func _ArrayParse($str, $before, $after) Return StringRegExp($str, "(?i)" & $before & "(.*?)" & $after, 3) EndFunc ;==>_ArrayParse Func AddURL($url) If Not WasCollected($url) Then _ArrayAdd($collected, $url) ;_ArrayAdd($urls, $url) FileWriteLine("spider.urls.txt", $url) $urlnum = $urlnum + 1 EndIf EndFunc ;==>AddURL Func WasCollected($url) $return = False For $i = 1 To Ubound($collected) - 1 Step 1 If $collected[$i] == $url Then $return = True ExitLoop EndIf Next If Not $return And UBound($collected) >= 1024 Then _ArrayDelete($collected, 1) Return $return EndFunc ;==>WasCollected Func GetURI($url) $uri = StringMid($url, 1, StringInStr($url, "://")) & "//" $turl = StringMid($url, StringLen($uri) + 1) If StringInStr($turl, "?") Then $temp = StringSplit($turl, "?") $turl = $temp[1] $temp = StringSplit($turl, "/") $uri = $uri & $temp[1] & "/" For $i = 2 To UBound($temp) - 1 Step 1 If StringInStr($temp[$i], ".") Or Not StringLen($temp[$i]) Then ExitLoop $uri = $uri & $temp[$i] & "/" Next If Not InetGetSize(StringLeft($uri, StringLen($uri) - 1)) Then $uri = StringMid($url, 1, StringInStr($url, "://")) & "//" $temp = StringSplit($turl, "?") $turl = $temp[1] $temp = StringSplit($turl, "/") $uri = $uri & $temp[1] & "/" For $i = 2 To UBound($temp) - 2 Step 1 If StringInStr($temp[$i], ".") Or Not StringLen($temp[$i]) Then ExitLoop $uri = $uri & $temp[$i] & "/" Next EndIf Else $temp = StringSplit($turl, "/") $uri = $uri & $temp[1] & "/" For $i = 2 To UBound($temp) - 1 Step 1 If StringInStr($temp[$i], ".") Or Not StringLen($temp[$i]) Then ExitLoop $uri = $uri & $temp[$i] & "/" Next EndIf Return $uri EndFunc ;==>GetURI Func GetURLs($url) $uri = GetURI($url) $file = "spider.html.txt" Status("Downloading", $url, 0) $filesize = InetGetSize($url) $lastsize = 0 $strikes = 0 InetGet($url, $file, 1, 1) While @InetGetActive If $lastsize == @InetGetBytesRead Then $strikes = $strikes + 1 If $strikes >= 30 Then ExitLoop $lastsize = @InetGetBytesRead Status("Downloading", $url, Round(($lastsize / $filesize) * 100)) Sleep(250) Wend $html = FileRead($file, FileGetSize($file)) FileDelete($file) Status("Parsing URLs", $url, 0) $tags = _ArrayParse($html, "<a", ">") For $i = 0 To UBound($tags) - 1 Step 1 Status("Checking <A> Tags for URLs", $url, Round(($i / (UBound($tags) - 1)) * 100)) CheckURL($uri, $tags[$i], $url) Next $tags = _ArrayParse($html, "<img", ">") For $i = 0 To UBound($tags) - 1 Step 1 Status("Checking <IMG> Tags for URLs", $url, Round(($i / (UBound($tags) - 1)) * 100)) CheckURL($uri, $tags[$i], $url) Next $tags = _ArrayParse($html, "<embed", ">") For $i = 0 To UBound($tags) - 1 Step 1 Status("Checking <EMBED> Tags for URLs", $url, Round(($i / (UBound($tags) - 1)) * 100)) CheckURL($uri, $tags[$i], $url) Next EndFunc ;==>GetURLs Func CheckURL($uri, $str, $ref) If StringInStr($str, "href=") Then $turl = GetAttr($str, "href=") If Not StringInStr(StringLeft($turl, 10), "://") Then If StringLeft($turl, 1) == "/" Then $turl = $uri & StringMid($turl, 2) Else $turl = $uri & $turl EndIf EndIf CheckType($turl, $ref) EndIf If StringInStr($str, "src=") Then $turl = GetAttr($str, "src=") If Not StringInStr(StringLeft($turl, 10), "://") Then If StringLeft($turl, 1) == "/" Then $turl = $uri & StringMid($turl, 2) Else $turl = $uri & $turl EndIf EndIf CheckType($turl, $ref) EndIf EndFunc ;==>CheckURL Func GetAttr($str, $attr) If StringInStr($str, $attr & '"') Then $temp = _ArrayParse($str, $attr & '"', '"') If UBound($temp) == 1 Then Return $temp[0] ElseIf StringInStr($str, $attr & "'") Then $temp = _ArrayParse($str, $attr & "'", "'") If UBound($temp) == 1 Then Return $temp[0] ElseIf StringInStr($str, $attr) Then $temp = StringMid($str, StringInStr($str, $attr) + StringLen($attr)) If StringInStr($temp, " ") Then $temp = StringMid($temp, 1, StringInStr($temp, " ") - 1) EndIf Return $temp EndIf EndFunc ;==>GetAttr Func CheckType($url, $ref) If StringRight($url, 4) == ".jpg" Or _ StringRight($url, 4) == ".gif" Or _ StringRight($url, 4) == ".png" Or _ StringRight($url, 4) == "bmp" Then FileWriteLine("spider.images.log", $url & @TAB & $ref) $imagenum = $imagenum + 1 ElseIf StringRight($url, 4) == ".mp3" Or _ StringRight($url, 4) == ".rbs" Then FileWriteLine("spider.audio.log", $url & @TAB & $ref) $audionum = $audionum + 1 AddURL(GetURI($url)) ElseIf StringRight($url, 4) == ".avi" Or _ StringRight($url, 4) == ".wmv" Or _ StringRight($url, 4) == ".mpg" Or _ StringRight($url, 5) == ".mpeg" Then FileWriteLine("spider.video.log", $url & @TAB & $ref) $videonum = $videonum + 1 AddURL(GetURI($url)) ElseIf StringRight($url, 4) == ".exe" Or _ StringRight($url, 4) == ".zip" Or _ StringRight($url, 4) == ".rar" Or _ StringRight($url, 4) == ".tar" Then ;Do Nothing Else AddURL($url) EndIf EndFunc ;==>CheckType As you see it saves the .mp3 url(URL1) after that it tabs and saves the URL of the page (URL2). Is it possible to save the Page Title istead of URL2 ? Edited January 20, 2013 by kenzu Link to comment Share on other sites More sharing options...
JohnOne Posted January 20, 2013 Share Posted January 20, 2013 "@InetGetBytesRead" Have you looked that up in the help file? AutoIt Absolute Beginners  Require a serial  Pause Script  Video Tutorials by Morthawt  ipify Monkey's are, like, natures humans. Link to comment Share on other sites More sharing options...
kenzu Posted January 20, 2013 Author Share Posted January 20, 2013 (edited) yes i looked, but i cant figure it out. i think the answer is in here $tags = _ArrayParse($html, "<img", ">") For $i = 0 To UBound($tags) - 1 Step 1 Status("Checking <IMG> Tags for URLs", $url, Round(($i / (UBound($tags) - 1)) * 100)) CheckURL($uri, $tags[$i], $url) by changing _arrayPhrase($html, "<img" to ($html, "<title") but i dont know how to save the titles in the txt files Edited January 20, 2013 by kenzu Link to comment Share on other sites More sharing options...
JohnOne Posted January 20, 2013 Share Posted January 20, 2013 Well you have an old very version of AutoIt then, because @InetGetBytesRead has not been in it for over 2 years. AutoIt Absolute Beginners  Require a serial  Pause Script  Video Tutorials by Morthawt  ipify Monkey's are, like, natures humans. Link to comment Share on other sites More sharing options...
BrewManNH Posted January 20, 2013 Share Posted January 20, 2013 JohnOne was trying to tell you that the script won't run on the latest version of AutoIt. What version are YOU running? If I posted any code, assume that code was written using the latest release version unless stated otherwise. Also, if it doesn't work on XP I can't help with that because I don't have access to XP, and I'm not going to.Give a programmer the correct code and he can do his work for a day. Teach a programmer to debug and he can do his work for a lifetime - by Chirag GudeHow to ask questions the smart way! I hereby grant any person the right to use any code I post, that I am the original author of, on the autoitscript.com forums, unless I've specifically stated otherwise in the code or the thread post. If you do use my code all I ask, as a courtesy, is to make note of where you got it from. Back up and restore Windows user files _Array.au3 - Modified array functions that include support for 2D arrays.  -  ColorChooser - An add-on for SciTE that pops up a color dialog so you can select and paste a color code into a script.  -  Customizable Splashscreen GUI w/Progress Bar - Create a custom "splash screen" GUI with a progress bar and custom label.  -  _FileGetProperty - Retrieve the properties of a file  -  SciTE Toolbar - A toolbar demo for use with the SciTE editor  -  GUIRegisterMsg demo - Demo script to show how to use the Windows messages to interact with controls and your GUI.  -   Latin Square password generator Link to comment Share on other sites More sharing options...
Mat Posted January 20, 2013 Share Posted January 20, 2013 JohnOne was trying to tell you that the script won't run on the latest version of AutoIt. What version are YOU running?The header of the script says: 3.1.1.87 AutoIt Project Listing Link to comment Share on other sites More sharing options...
BrewManNH Posted January 21, 2013 Share Posted January 21, 2013 The header of the script says: 3.1.1.87I understand that. My point was that it won't run on the current version of AutoIt, I don't think the header has any bearing on what I said. That is why I asked what version the OP was running. If I posted any code, assume that code was written using the latest release version unless stated otherwise. Also, if it doesn't work on XP I can't help with that because I don't have access to XP, and I'm not going to.Give a programmer the correct code and he can do his work for a day. Teach a programmer to debug and he can do his work for a lifetime - by Chirag GudeHow to ask questions the smart way! I hereby grant any person the right to use any code I post, that I am the original author of, on the autoitscript.com forums, unless I've specifically stated otherwise in the code or the thread post. If you do use my code all I ask, as a courtesy, is to make note of where you got it from. Back up and restore Windows user files _Array.au3 - Modified array functions that include support for 2D arrays.  -  ColorChooser - An add-on for SciTE that pops up a color dialog so you can select and paste a color code into a script.  -  Customizable Splashscreen GUI w/Progress Bar - Create a custom "splash screen" GUI with a progress bar and custom label.  -  _FileGetProperty - Retrieve the properties of a file  -  SciTE Toolbar - A toolbar demo for use with the SciTE editor  -  GUIRegisterMsg demo - Demo script to show how to use the Windows messages to interact with controls and your GUI.  -   Latin Square password generator Link to comment Share on other sites More sharing options...
kenzu Posted January 21, 2013 Author Share Posted January 21, 2013 (edited) im using autoit3, the script works fine, it saves the .mp3,jpg,avi files just fine, i am trying to save the page title too in the audio.txt file. I tryed to add this $html = FileRead($file, FileGetSize($file)) For $line = 1 To 500 If StringInStr(FileReadLine($file, $line), "<title>") Then $Fchartid = StringRegExpReplace(FileReadLine($file, $line), "(\D.*|\d.*|)(<title>)=(\d*)(\D.*|\d.*|)", "$3") $Online = $line EndIf and this to save it ElseIf StringRight($url, 4) == ".swf" Or _ StringRight($url, 4) == ".mp3" Then FileWriteLine("spider.audio.log", $url & @TAB & $Fchartid) but it's not working Edited January 21, 2013 by kenzu Link to comment Share on other sites More sharing options...
BrewManNH Posted January 21, 2013 Share Posted January 21, 2013 That's not a version number, that's the name of the program. If you're trying to run this on any version above 3.3.2.0 it won't run correctly at all. If I posted any code, assume that code was written using the latest release version unless stated otherwise. Also, if it doesn't work on XP I can't help with that because I don't have access to XP, and I'm not going to.Give a programmer the correct code and he can do his work for a day. Teach a programmer to debug and he can do his work for a lifetime - by Chirag GudeHow to ask questions the smart way! I hereby grant any person the right to use any code I post, that I am the original author of, on the autoitscript.com forums, unless I've specifically stated otherwise in the code or the thread post. If you do use my code all I ask, as a courtesy, is to make note of where you got it from. Back up and restore Windows user files _Array.au3 - Modified array functions that include support for 2D arrays.  -  ColorChooser - An add-on for SciTE that pops up a color dialog so you can select and paste a color code into a script.  -  Customizable Splashscreen GUI w/Progress Bar - Create a custom "splash screen" GUI with a progress bar and custom label.  -  _FileGetProperty - Retrieve the properties of a file  -  SciTE Toolbar - A toolbar demo for use with the SciTE editor  -  GUIRegisterMsg demo - Demo script to show how to use the Windows messages to interact with controls and your GUI.  -   Latin Square password generator Link to comment Share on other sites More sharing options...
JohnOne Posted January 21, 2013 Share Posted January 21, 2013 MsgBox(0,0,@AutoItVersion) AutoIt Absolute Beginners  Require a serial  Pause Script  Video Tutorials by Morthawt  ipify Monkey's are, like, natures humans. Link to comment Share on other sites More sharing options...
kenzu Posted January 21, 2013 Author Share Posted January 21, 2013 (edited) i run v3.3.8.1 .. as i sayd the script works but i dont understand how to save the page title instead of the url Edited January 21, 2013 by kenzu Link to comment Share on other sites More sharing options...
JohnOne Posted January 21, 2013 Share Posted January 21, 2013 You should post your new code, because you cannot be running the code in first post, it would error and not run. AutoIt Absolute Beginners  Require a serial  Pause Script  Video Tutorials by Morthawt  ipify Monkey's are, like, natures humans. Link to comment Share on other sites More sharing options...
kenzu Posted January 21, 2013 Author Share Posted January 21, 2013 my code is this expandcollapse popup; ---------------------------------------------------------------------------- ; ; AutoIt Version: 3.1.1.87 ; Author: AcidicChip <acidicchip@acidicchip.com> ; ; Script Name: Web Media Spider ; Script Version: 0.21 ; ; Script Function: ; Spider the web and gather media file URLs ; ; ---------------------------------------------------------------------------- Opt("GUIOnEventMode", 1) Opt("TrayIconDebug", 1) #include <Array.au3> #include <GUIConstants.au3> Dim $collected[1] Dim $urls[1] Dim $urlon = 0 Dim $urlnum = 0 Dim $imagenum = 0 Dim $audionum = 0 Dim $videonum = 0 #region "GUI" GUICreate("Media Spider", 600, 100) $lblAction = GUICtrlCreateLabel("Action:", 0, 3, 35, 20) $txtAction = GUICtrlCreateInput("", 40, 0, 560, 20) GUICtrlSetState($txtAction, $GUI_DISABLE) $lblURL = GUICtrlCreateLabel("URL:", 0, 23, 35, 20) $txtURL = GUICtrlCreateInput("", 40, 20, 560, 20) GUICtrlSetState($txtURL, $GUI_DISABLE) $prgPercent = GUICtrlCreateProgress(0, 40, 560, 20) $txtPercent = GUICtrlCreateInput("0%", 560, 40, 40, 20) GUICtrlSetState($txtPercent, $GUI_DISABLE) $lblURLs = GUICtrlCreateLabel("URLs:", 0, 63, 35, 20) $txtURLs = GUICtrlCreateInput("0", 40, 60, 75, 20) GUICtrlSetState($txtURLs, $GUI_DISABLE) $lblAudio = GUICtrlCreateLabel("Audio:", 125, 63, 35, 20) $txtAudio = GUICtrlCreateInput("0", 160, 60, 75, 20) GUICtrlSetState($txtAudio, $GUI_DISABLE) $lblImages = GUICtrlCreateLabel("Images:", 245, 63, 36, 20) $txtImages = GUICtrlCreateInput("0", 285, 60, 75, 20) GUICtrlSetState($txtImages, $GUI_DISABLE) $lblVideos = GUICtrlCreateLabel("Videos:", 370, 63, 35, 20) $txtVideos = GUICtrlCreateInput("0", 410, 60, 75, 20) GUICtrlSetState($txtVideos, $GUI_DISABLE) $lblHistory = GUICtrlCreateLabel("History:", 490, 63, 35, 20) $txtHistory = GUICtrlCreateInput("0", 530, 60, 75, 20) GUICtrlSetState($txtHistory, $GUI_DISABLE) $lblStartURL = GUICtrlCreateLabel("Start URL:", 0, 83, 50, 20) $txtStartURL = GUICtrlCreateInput("http://www.myspace.com/acidicchip", 55, 80, 490, 20) $btnStartStop = GUICtrlCreateButton("Start", 550, 80, 50, 20) GUISetState(@SW_SHOW) GUISetOnEvent($GUI_EVENT_CLOSE, "GUIClose") GUICtrlSetOnEvent($btnStartStop, "GUIStartStop") #endregion "GUI" Func GUIClose() Exit EndFunc ;==>GUIClose Func GUIStartStop() If GUICtrlRead($btnStartStop) == "Start" Then GUICtrlSetData($btnStartStop, "Stop") GUICtrlSetState($txtStartURL, $GUI_DISABLE) FileDelete("spider.urls.txt") GetURLs(GUICtrlRead($txtStartURL)) Do ;$url = $urls[1] $urlon = $urlon + 1 $url = FileReadLine("spider.urls.txt", $urlon) ;_ArrayDelete($urls, 1) $urlnum = $urlnum - 1 GetURLs($url) Until $urlnum <= 0 Or GUICtrlRead($btnStartStop) == "Start" ;Until UBound($urls) <= 1 Or GUICtrlRead($btnStartStop) == "Start" Else GUICtrlSetData($btnStartStop, "Start") GUICtrlSetState($txtStartURL, $GUI_ENABLE) EndIf EndFunc ;==>GUIStartStop While 1 Sleep(250) Wend Func Status($action, $url, $percent) GUICtrlSetData($txtAction, $action) If $url <> "" Then GUICtrlSetData($txtURL, $url) GUICtrlSetData($prgPercent, $percent) GUICtrlSetData($txtPercent, $percent & "%") GUICtrlSetData($txtURLs, $urlnum) ;GUICtrlSetData($txtURLs, UBound($urls)) GUICtrlSetData($txtAudio, $audionum) GUICtrlSetData($txtImages, $imagenum) GUICtrlSetData($txtVideos, $videonum) GUICtrlSetData($txtHistory, UBound($collected)) EndFunc ;==>Status Func _ArrayParse($str, $before, $after) Return StringRegExp($str, "(?i)" & $before & "(.*?)" & $after, 3) EndFunc ;==>_ArrayParse Func AddURL($url) If Not WasCollected($url) Then _ArrayAdd($collected, $url) ;_ArrayAdd($urls, $url) FileWriteLine("spider.urls.txt", $url) $urlnum = $urlnum + 1 EndIf EndFunc ;==>AddURL Func WasCollected($url) $return = False For $i = 1 To Ubound($collected) - 1 Step 1 If $collected[$i] == $url Then $return = True ExitLoop EndIf Next If Not $return And UBound($collected) >= 1024 Then _ArrayDelete($collected, 1) Return $return EndFunc ;==>WasCollected Func GetURI($url) $uri = StringMid($url, 1, StringInStr($url, "://")) & "//" $turl = StringMid($url, StringLen($uri) + 1) If StringInStr($turl, "?") Then $temp = StringSplit($turl, "?") $turl = $temp[1] $temp = StringSplit($turl, "/") $uri = $uri & $temp[1] & "/" For $i = 2 To UBound($temp) - 1 Step 1 If StringInStr($temp[$i], ".") Or Not StringLen($temp[$i]) Then ExitLoop $uri = $uri & $temp[$i] & "/" Next If Not InetGetSize(StringLeft($uri, StringLen($uri) - 1)) Then $uri = StringMid($url, 1, StringInStr($url, "://")) & "//" $temp = StringSplit($turl, "?") $turl = $temp[1] $temp = StringSplit($turl, "/") $uri = $uri & $temp[1] & "/" For $i = 2 To UBound($temp) - 2 Step 1 If StringInStr($temp[$i], ".") Or Not StringLen($temp[$i]) Then ExitLoop $uri = $uri & $temp[$i] & "/" Next EndIf Else $temp = StringSplit($turl, "/") $uri = $uri & $temp[1] & "/" For $i = 2 To UBound($temp) - 1 Step 1 If StringInStr($temp[$i], ".") Or Not StringLen($temp[$i]) Then ExitLoop $uri = $uri & $temp[$i] & "/" Next EndIf Return $uri EndFunc ;==>GetURI Func GetURLs($url) $uri = GetURI($url) $file = "spider.html.txt" Status("Downloading", $url, 0) $filesize = InetGetSize($url) $lastsize = 0 $strikes = 0 InetGet($url, $file, 1, 1) While @InetGetActive If $lastsize == @InetGetBytesRead Then $strikes = $strikes + 1 If $strikes >= 30 Then ExitLoop $lastsize = @InetGetBytesRead Status("Downloading", $url, Round(($lastsize / $filesize) * 100)) Sleep(250) Wend $html = FileRead($file, FileGetSize($file)) For $line = 1 To 500 If StringInStr(FileReadLine($file, $line), "<title>") Then $Fchartid = StringRegExpReplace(FileReadLine($file, $line), "(\D.*|\d.*|)(<title>)=(\d*)(\D.*|\d.*|)", "$3") $Online = $line EndIf Next FileDelete($file) Status("Parsing URLs", $url, 0) $tags = _ArrayParse($html, "<a", ">") For $i = 0 To UBound($tags) - 1 Step 1 Status("Checking <A> Tags for URLs", $url, Round(($i / (UBound($tags) - 1)) * 100)) CheckURL($uri, $tags[$i], $url, $Fchartid) Next $tags = _ArrayParse($html, "<EMBED", ">") For $i = 0 To UBound($tags) - 1 Step 1 Status("Checking <EMBED> Tags for URLs", $url, Round(($i / (UBound($tags) - 1)) * 100)) CheckURL($uri, $tags[$i], $url, $Fchartid) Next EndFunc ;==>GetURLs Func CheckURL($uri, $str, $ref, $Fchartid) If StringInStr($str, "href=") Then $turl = GetAttr($str, "href=") If Not StringInStr(StringLeft($turl, 10), "://") Then If StringLeft($turl, 1) == "/" Then $turl = $uri & StringMid($turl, 2) Else $turl = $uri & $turl EndIf EndIf CheckType($turl, $ref, $Fchartid) EndIf If StringInStr($str, "src=") Then $turl = GetAttr($str, "src=") If Not StringInStr(StringLeft($turl, 10), "://") Then If StringLeft($turl, 1) == "/" Then $turl = $uri & StringMid($turl, 2) Else $turl = $uri & $turl EndIf EndIf CheckType($turl, $ref, $Fchartid) EndIf EndFunc ;==>CheckURL Func GetAttr($str, $attr) If StringInStr($str, $attr & '"') Then $temp = _ArrayParse($str, $attr & '"', '"') If UBound($temp) == 1 Then Return $temp[0] ElseIf StringInStr($str, $attr & "'") Then $temp = _ArrayParse($str, $attr & "'", "'") If UBound($temp) == 1 Then Return $temp[0] ElseIf StringInStr($str, $attr) Then $temp = StringMid($str, StringInStr($str, $attr) + StringLen($attr)) If StringInStr($temp, " ") Then $temp = StringMid($temp, 1, StringInStr($temp, " ") - 1) EndIf Return $temp EndIf EndFunc ;==>GetAttr Func CheckType($url, $ref, $Fchartid) If StringRight($url, 4) == ".jpg" Or _ StringRight($url, 4) == ".bmp" Then FileWriteLine("spider.images.log", $url & @TAB & $ref) $imagenum = $imagenum + 1 ElseIf StringRight($url, 4) == ".mp3" Or _ StringRight($url, 4) == ".rbs" Then FileWriteLine("spider.audio.log", $url & @TAB & $Fchartid) $audionum = $audionum + 1 AddURL(GetURI($url)) ElseIf StringRight($url, 4) == ".exe" Or _ StringRight($url, 4) == ".zip" Or _ StringRight($url, 4) == ".rar" Or _ StringRight($url, 4) == ".tar" Then ;Do Nothing Else AddURL($url) EndIf EndFunc ;==>CheckType It works fine for 5-10 url's after that i get new.au3 (198) : ==> Variable used without being declared.: CheckURL($uri, $tags[$i], $url, $Fchartid) CheckURL($uri, $tags[$i], $url, ^ ERROR Link to comment Share on other sites More sharing options...
BrewManNH Posted January 21, 2013 Share Posted January 21, 2013 You should post your new code, because you cannot be running the code in first post, it would error and not run.I ran the code, it doesn't error on those macros, it just sees them as being 0 so the While loop never runs. kenzu 1 If I posted any code, assume that code was written using the latest release version unless stated otherwise. Also, if it doesn't work on XP I can't help with that because I don't have access to XP, and I'm not going to.Give a programmer the correct code and he can do his work for a day. Teach a programmer to debug and he can do his work for a lifetime - by Chirag GudeHow to ask questions the smart way! I hereby grant any person the right to use any code I post, that I am the original author of, on the autoitscript.com forums, unless I've specifically stated otherwise in the code or the thread post. If you do use my code all I ask, as a courtesy, is to make note of where you got it from. Back up and restore Windows user files _Array.au3 - Modified array functions that include support for 2D arrays.  -  ColorChooser - An add-on for SciTE that pops up a color dialog so you can select and paste a color code into a script.  -  Customizable Splashscreen GUI w/Progress Bar - Create a custom "splash screen" GUI with a progress bar and custom label.  -  _FileGetProperty - Retrieve the properties of a file  -  SciTE Toolbar - A toolbar demo for use with the SciTE editor  -  GUIRegisterMsg demo - Demo script to show how to use the Windows messages to interact with controls and your GUI.  -   Latin Square password generator Link to comment Share on other sites More sharing options...
kenzu Posted January 21, 2013 Author Share Posted January 21, 2013 I ran the code, it doesn't error on those macros, it just sees them as being 0 so the While loop never runs.You just need to press 2-3 times the start button so that it starts crawling Link to comment Share on other sites More sharing options...
BrewManNH Posted January 21, 2013 Share Posted January 21, 2013 Doesn't matter how many times you press the button, the While loop will never run. This will ALWAYS evaluate to zero because the macro @InetGetActive doesn't exist any longer. If you run it and run Au3Check on it first, it will never run at all because that will cause it to fail. While @InetGetActive If I posted any code, assume that code was written using the latest release version unless stated otherwise. Also, if it doesn't work on XP I can't help with that because I don't have access to XP, and I'm not going to.Give a programmer the correct code and he can do his work for a day. Teach a programmer to debug and he can do his work for a lifetime - by Chirag GudeHow to ask questions the smart way! I hereby grant any person the right to use any code I post, that I am the original author of, on the autoitscript.com forums, unless I've specifically stated otherwise in the code or the thread post. If you do use my code all I ask, as a courtesy, is to make note of where you got it from. Back up and restore Windows user files _Array.au3 - Modified array functions that include support for 2D arrays.  -  ColorChooser - An add-on for SciTE that pops up a color dialog so you can select and paste a color code into a script.  -  Customizable Splashscreen GUI w/Progress Bar - Create a custom "splash screen" GUI with a progress bar and custom label.  -  _FileGetProperty - Retrieve the properties of a file  -  SciTE Toolbar - A toolbar demo for use with the SciTE editor  -  GUIRegisterMsg demo - Demo script to show how to use the Windows messages to interact with controls and your GUI.  -   Latin Square password generator Link to comment Share on other sites More sharing options...
JohnOne Posted January 21, 2013 Share Posted January 21, 2013 Look at some other Inet native functions InetInfo InetGetSize etc... I'm just off to bang my head against a brick wall. AutoIt Absolute Beginners  Require a serial  Pause Script  Video Tutorials by Morthawt  ipify Monkey's are, like, natures humans. Link to comment Share on other sites More sharing options...
guinness Posted January 21, 2013 Share Posted January 21, 2013 kenzu, Sorry to come in at short notice, but why the reluctance to use V3.3.8.1? UDF List:  _AdapterConnections() • _AlwaysRun() • _AppMon() • _AppMonEx() • _ArrayFilter/_ArrayReduce • _BinaryBin() • _CheckMsgBox() • _CmdLineRaw() • _ContextMenu() • _ConvertLHWebColor()/_ConvertSHWebColor() • _DesktopDimensions() • _DisplayPassword() • _DotNet_Load()/_DotNet_Unload() • _Fibonacci() • _FileCompare() • _FileCompareContents() • _FileNameByHandle() • _FilePrefix/SRE() • _FindInFile() • _GetBackgroundColor()/_SetBackgroundColor() • _GetConrolID() • _GetCtrlClass() • _GetDirectoryFormat() • _GetDriveMediaType() • _GetFilename()/_GetFilenameExt() • _GetHardwareID() • _GetIP() • _GetIP_Country() • _GetOSLanguage() • _GetSavedSource() • _GetStringSize() • _GetSystemPaths() • _GetURLImage() • _GIFImage() • _GoogleWeather() • _GUICtrlCreateGroup() • _GUICtrlListBox_CreateArray() • _GUICtrlListView_CreateArray() • _GUICtrlListView_SaveCSV() • _GUICtrlListView_SaveHTML() • _GUICtrlListView_SaveTxt() • _GUICtrlListView_SaveXML() • _GUICtrlMenu_Recent() • _GUICtrlMenu_SetItemImage() • _GUICtrlTreeView_CreateArray() • _GUIDisable() • _GUIImageList_SetIconFromHandle() • _GUIRegisterMsg() • _GUISetIcon() • _Icon_Clear()/_Icon_Set() • _IdleTime() • _InetGet() • _InetGetGUI() • _InetGetProgress() • _IPDetails() • _IsFileOlder() • _IsGUID() • _IsHex() • _IsPalindrome() • _IsRegKey() • _IsStringRegExp() • _IsSystemDrive() • _IsUPX() • _IsValidType() • _IsWebColor() • _Language() • _Log() • _MicrosoftInternetConnectivity() • _MSDNDataType() • _PathFull/GetRelative/Split() • _PathSplitEx() • _PrintFromArray() • _ProgressSetMarquee() • _ReDim() • _RockPaperScissors()/_RockPaperScissorsLizardSpock() • _ScrollingCredits • _SelfDelete() • _SelfRename() • _SelfUpdate() • _SendTo() • _ShellAll() • _ShellFile() • _ShellFolder() • _SingletonHWID() • _SingletonPID() • _Startup() • _StringCompact() • _StringIsValid() • _StringRegExpMetaCharacters() • _StringReplaceWholeWord() • _StringStripChars() • _Temperature() • _TrialPeriod() • _UKToUSDate()/_USToUKDate() • _WinAPI_Create_CTL_CODE() • _WinAPI_CreateGUID() • _WMIDateStringToDate()/_DateToWMIDateString() • Au3 script parsing • AutoIt Search • AutoIt3 Portable • AutoIt3WrapperToPragma • AutoItWinGetTitle()/AutoItWinSetTitle() • Coding • DirToHTML5 • FileInstallr • FileReadLastChars() • GeoIP database • GUI - Only Close Button • GUI Examples • GUICtrlDeleteImage() • GUICtrlGetBkColor() • GUICtrlGetStyle() • GUIEvents • GUIGetBkColor() • Int_Parse() & Int_TryParse() • IsISBN() • LockFile() • Mapping CtrlIDs • OOP in AutoIt • ParseHeadersToSciTE() • PasswordValid • PasteBin • Posts Per Day • PreExpand • Protect Globals • Queue() • Resource Update • ResourcesEx • SciTE Jump • Settings INI • SHELLHOOK • Shunting-Yard • Signature Creator • Stack() • Stopwatch() • StringAddLF()/StringStripLF() • StringEOLToCRLF() • VSCROLL • WM_COPYDATA • More Examples... Updated: 22/04/2018 Link to comment Share on other sites More sharing options...
kenzu Posted January 21, 2013 Author Share Posted January 21, 2013 a am a begginter at autoit and programming.. Link to comment Share on other sites More sharing options...
guinness Posted January 21, 2013 Share Posted January 21, 2013 Whoops, missed your last post about your version. Anyway, JohnOne mentioned what to do already, go to the help file, also look in my signature for Inet. UDF List:  _AdapterConnections() • _AlwaysRun() • _AppMon() • _AppMonEx() • _ArrayFilter/_ArrayReduce • _BinaryBin() • _CheckMsgBox() • _CmdLineRaw() • _ContextMenu() • _ConvertLHWebColor()/_ConvertSHWebColor() • _DesktopDimensions() • _DisplayPassword() • _DotNet_Load()/_DotNet_Unload() • _Fibonacci() • _FileCompare() • _FileCompareContents() • _FileNameByHandle() • _FilePrefix/SRE() • _FindInFile() • _GetBackgroundColor()/_SetBackgroundColor() • _GetConrolID() • _GetCtrlClass() • _GetDirectoryFormat() • _GetDriveMediaType() • _GetFilename()/_GetFilenameExt() • _GetHardwareID() • _GetIP() • _GetIP_Country() • _GetOSLanguage() • _GetSavedSource() • _GetStringSize() • _GetSystemPaths() • _GetURLImage() • _GIFImage() • _GoogleWeather() • _GUICtrlCreateGroup() • _GUICtrlListBox_CreateArray() • _GUICtrlListView_CreateArray() • _GUICtrlListView_SaveCSV() • _GUICtrlListView_SaveHTML() • _GUICtrlListView_SaveTxt() • _GUICtrlListView_SaveXML() • _GUICtrlMenu_Recent() • _GUICtrlMenu_SetItemImage() • _GUICtrlTreeView_CreateArray() • _GUIDisable() • _GUIImageList_SetIconFromHandle() • _GUIRegisterMsg() • _GUISetIcon() • _Icon_Clear()/_Icon_Set() • _IdleTime() • _InetGet() • _InetGetGUI() • _InetGetProgress() • _IPDetails() • _IsFileOlder() • _IsGUID() • _IsHex() • _IsPalindrome() • _IsRegKey() • _IsStringRegExp() • _IsSystemDrive() • _IsUPX() • _IsValidType() • _IsWebColor() • _Language() • _Log() • _MicrosoftInternetConnectivity() • _MSDNDataType() • _PathFull/GetRelative/Split() • _PathSplitEx() • _PrintFromArray() • _ProgressSetMarquee() • _ReDim() • _RockPaperScissors()/_RockPaperScissorsLizardSpock() • _ScrollingCredits • _SelfDelete() • _SelfRename() • _SelfUpdate() • _SendTo() • _ShellAll() • _ShellFile() • _ShellFolder() • _SingletonHWID() • _SingletonPID() • _Startup() • _StringCompact() • _StringIsValid() • _StringRegExpMetaCharacters() • _StringReplaceWholeWord() • _StringStripChars() • _Temperature() • _TrialPeriod() • _UKToUSDate()/_USToUKDate() • _WinAPI_Create_CTL_CODE() • _WinAPI_CreateGUID() • _WMIDateStringToDate()/_DateToWMIDateString() • Au3 script parsing • AutoIt Search • AutoIt3 Portable • AutoIt3WrapperToPragma • AutoItWinGetTitle()/AutoItWinSetTitle() • Coding • DirToHTML5 • FileInstallr • FileReadLastChars() • GeoIP database • GUI - Only Close Button • GUI Examples • GUICtrlDeleteImage() • GUICtrlGetBkColor() • GUICtrlGetStyle() • GUIEvents • GUIGetBkColor() • Int_Parse() & Int_TryParse() • IsISBN() • LockFile() • Mapping CtrlIDs • OOP in AutoIt • ParseHeadersToSciTE() • PasswordValid • PasteBin • Posts Per Day • PreExpand • Protect Globals • Queue() • Resource Update • ResourcesEx • SciTE Jump • Settings INI • SHELLHOOK • Shunting-Yard • Signature Creator • Stack() • Stopwatch() • StringAddLF()/StringStripLF() • StringEOLToCRLF() • VSCROLL • WM_COPYDATA • More Examples... Updated: 22/04/2018 Link to comment Share on other sites More sharing options...
Recommended Posts
Create an account or sign in to comment
You need to be a member in order to leave a comment
Create an account
Sign up for a new account in our community. It's easy!
Register a new accountSign in
Already have an account? Sign in here.
Sign In Now