Brent Dodson

Web Programming Page

eightball Register
Unregistered users click here to register registered users can post their website here
eightball Science Humor:
The Man Who Almost Invented The Vacuum Cleaner
The man officially credited with inventing the vacuum cleaner is
Hubert Cecil Booth. However, he got the idea from a man who almost
invented it.
In 1901 Booth visited a London music-hall. On the bill was an
American inventor with his wonder machine for removing dust from carpets.
The machine comprised a box about one foot square with a bag on top.
After watching the act -- which made everyone in the front six rows sneeze
-- Booth went round to the inventor's dressing room.
"It should suck not blow," said Booth, coming straight to the
point. "Suck?", exclaimed the enraged inventor. "Your machine just moves
the dust around the room," Booth informed him. "Suck? Suck? Sucking is
not possible," was the inventor's reply and he stormed out. Booth proved
that it was by the simple expedient of kneeling down, pursing his lips and
sucking the back of an armchair. "I almost choked," he said afterwards.
-- Stephen Pile, "The Book of Heroic Failures"
eightball Chuck Norris Humor:
Chuck Norris does not hunt because the word hunting infers the probability of failure. Chuck Norris goes killing.
eightball Jobs:
Need a Job?
Check these out!!!!!
Jobs available
Check if a URL exists -
Source: Ruby remote file checker[dzone.com]



require 'open-uri'
require 'net/http'

def remote_file_exists?(url)
url = URI.parse(url)
Net::HTTP.start(url.host, url.port) do |http|
return http.head(url.request_uri).code == "200"
end
end



remote_file_exists? 'http://www.www.www' #=>false
remote_file_exists? 'http://www.wired.com/' #=>true
remote_file_exists? 'http://www.wired.com/blogs/' #=>true
remote_file_exists? 'http://www.wired.com/i-love-ruby/' #=>false
A python tagger app -
A Tagger application, this is a Python app to generate an index of links organized by words that occur in their title strings, (done to self-teach python)
Input: is file containing a list of links
Outputs: links indexed by words that occur in their titles, output dumped to console.



from BeautifulSoup import BeautifulSoup,SoupStrainer
from urllib2 import urlopen, URLError
from pysqlite2 import dbapi2 as sqlite
from sets import Set
import re

# HTML parser class
class MyHTMLParser():

titledata = ''

# browse to specified page &extract title string
# borrowed exception handling code from:
# http://www.voidspace.org.uk/python/articles/urllib2.shtml
def visitURL(self,url):
self.titledata = ''
try:
req = urlopen(url)
except URLError, e:
if hasattr(e, 'reason'):
print 'We failed to reach a server.'
print 'Reason: ', e.reason
elif hasattr(e, 'code'):
print 'The server couldn\'t fulfill the request.'
print 'Error code: ', e.code
else:
try:
title = SoupStrainer('title')
for tag in BeautifulSoup(req, parseOnlyThese=title):
s = tag.string.strip().lower()
self.titledata += s
except:
print "Error: HTML parse error"

# get title string
def getTitleData(self):
return self.titledata



# Titleword2URL map
class TitleWords2LinkMap:
# word to URL map
word2URLmap = {}

def storeData (self, word, url):
if (self.word2URLmap.has_key(word)):
self.word2URLmap[word].append(url)
else:
self.word2URLmap[word] =[url]

def printDirectory(self):
keys = self.word2URLmap.keys()
for key in keys:
print '\n',key, "===>", self.word2URLmap[key]





# Map URLs to title strings
class Link2TitleMap:

# global to keep track of the categorizing title for a set of links
title = ''

# hashtable of links indexed by their categorizing title
# (dictionary in python speak)
weblinks = {}

#HTML parser object
htmlparser = MyHTMLParser()

# remove these words from the title, these are not to be indexed
common_words = Set(['in','the', 'of','it', 'on','a','an', 'with', 'to', 'for', 'you' 'your','my','mine'])

# remove commonly occurring words in titles
def sanitizeTitle (self):
keys = self.weblinks.keys()

for key in keys:
title_word_set = Set()
title_list = self.weblinks[key]

# title is made up of a set of words...
for title in title_list:
title_words = title.split()
for word in title_words:
if(word.isalpha()):
title_word_set.add(word)
# ...with common words removed
s = title_word_set.difference(self.common_words)
self.weblinks[key] = s




# store incoming input string into link Hash
def storeData (self, data):
if (data.startswith('http')):
if (self.weblinks.has_key(data)):
self.weblinks[data].append(self.title.lower())
else:
self.weblinks[data] = [self.title.lower()]
else:
self.title = data


# print out contents of link2title map
def printDirectory(self):
keys = self.weblinks.keys()
for key in keys:
print '\n',key, "--->", self.weblinks[key]



def printTitle(self):
urllist = self.weblinks.keys()
for url in urllist:
urllist = self.weblinks[key]
self.htmlparser.visitURL(url)
print url, "--->", self.htmlparser.getTitleData()



# iterate thru the linkDirectory hash, visiting each URL pointed
# to by the keys &extract the title of each webpage
def getPageTitle(self):
urllist = self.weblinks.keys()
for url in urllist:
urllist = self.weblinks[url]
self.htmlparser.visitURL(url)
self.weblinks[url].append(self.htmlparser.getTitleData())

# return the URL2titlewordmap
def getMap(self):
return self.weblinks






##############################################
# main program
# open file for reading
infile = open('Weblinks.txt','r')

# create a new LinkDirectory object
linkdir = Link2TitleMap()


title2url = TitleWords2LinkMap()

# read in file having the links (line by line - for efficiency)
# internalize file data into linkdir object
str = infile.readline()
while (str != ''):
str = str.strip()
if(str != '\n'):
linkdir.storeData(str)
str = infile.readline()

# close file handle
infile.close()

# populate the link hash by visiting the links ( keys)
# and extracting the title
linkdir.getPageTitle()

# remove common words from title ( we dont index these)
linkdir.sanitizeTitle()

# generate tag ->URL mappling
url2words = linkdir.getMap()
urllist = url2words.keys()
for url in urllist:
words = url2words[url]
for word in words:
title2url.storeData(word, url)

# print out tag->mapping to console
title2url.printDirectory()

many mechanize examples -
This list based on my (german) article: "Web scraping mit Ruby/Mechanize" http://sixserv.org/2009/05/27/webscripting-mit-ruby-und-mechanize/

#00 Initialization


require 'rubygems'
require 'mechanize'
agent = WWW::Mechanize.new
agent.set_proxy('localhost', '8000')
agent.user_agent = 'Individueller User-Agent'
agent.user_agent_alias = 'Linux Mozilla'
agent.open_timeout = 3
agent.read_timeout = 4
agent.keep_alive = false

agent.max_history = 0 # reduce memory if you make lots of requests


#01 manual get requests


url = 'http://apoc.sixserv.org/requestinfo/'
page = agent.get url
# or ...
page = agent.get(url, {"name" =>"value", "key" =>"val"})


#02 manual post submits


url = 'http://apoc.sixserv.org/requestinfo/'
page = agent.post(url, {"name" =>"value", "key" =>"val"})


#03 form post submits


page = agent.get 'https://twitter.com/login'
login_form = page.form_with(:action =>'https://twitter.com/sessions')
login_form['session[username_or_email]'] = '[Username]'
login_form['session[password]'] = '[Password]'
page = agent.submit login_form


#04 link and history navigation


page = agent.get 'http://www.heise.de/'
page = agent.click(page.link_with(:text =>/Telepolis/))
page = agent.click(page.link_with(:href =>/artikel/))
agent.back
agent.back
puts page.body


#05 exceptions


begin
page = agent.get 'http://apoc.sixserv.org/diese/seite/gibt/es/nicht/'
rescue WWW::Mechanize::ResponseCodeError
puts "ResponseCodeError - Code: #{$!}"
end


#06 referer


page = agent.get(:url =>'http://apoc.sixserv.org/requestinfo/',
:referer =>'http://google.com/this/is/a/custom/referer')
puts page.body


#07 request header manipulation


agent.pre_connect_hooks <params[:request]['X-Requested-With'] = 'XMLHttpRequest'
end


#08 response header


page = agent.head 'http://sixserv.org'
server_version = page.header['server']
puts "Server: #{server_version}"
if page.header.key? 'x-powered-by'
php_version = page.header['x-powered-by']
puts "X-Powered-By: #{php_version}"
end
# redirection urls:
agent.redirect_ok = false
page = agent.get 'http://www.sixserv.org/'
puts page.header['location']


#09 content parsing


# X Path / CSS-Selector:
page = agent.get 'http://xkcd.com/'
img = page.search '/html/body/div/div[2]/div/div[2]/div/div/img'
puts img
# Regular Expression:
page = agent.get 'http://example.com/'
page.body.match /

([^<]+)<\/h3>/
puts "Heading 3: #{$1}"


#10 "with" method examples


# *_with: form, link, base, frame or iframe

# get the first link including "foo" inside url:
page.link_with(:href =>/foo/)

# all links with text 'more'
page.links_with(:text =>'more')

# get theform with the name 'foo'
page.form_with('foo') # or form_with(:name =>'foo')

Webserver in Perl -

#!/usr/bin/perl -w
#
# Simple server using IO::Socket
# John Harrison - 16 Nov 2009
#
use strict;
use IO::Socket;
use CGI;

my $port = 6463;
my $host = 'HOSTNAME';

my $sock = new IO::Socket::INET(
LocalHost =>$host,
LocalPort =>$port,
Proto =>'tcp',
Listen =>SOMAXCONN,
Reuse =>1
);
(my $my_name = $0) =~ s{.*/}{}g;

$sock or die "$my_name: Failed to create socket :$!\n";

my($new_sock, $c_addr); # , $buf);

my @ignore = (
'Host',
'User-Agent', 'Accept', 'Accept-Language',
'Accept-Encoding', 'Accept-Charset',
'Keep-Alive', 'Connection', 'Cache-Control'
);

print "Starting Server...\n";
SOCK: while (($new_sock, $c_addr) = $sock->accept()) {

my ($client_port, $c_ip) = sockaddr_in($c_addr);
my $client_ip = inet_ntoa($c_ip);
# my $client_host = gethostbyaddr($c_ip, AF_INET);

print "$my_name: Connect from: [$client_ip]\n";

LINE: while (defined (my $buf = <$new_sock>)) {

chomp $buf;
$buf =~ s/\r$//;

if ( $buf =~ m{^GET /host/([^/\s]+)/dc HTTP/1.1} ) {

my $q = CGI->new;

# print $new_sock $q->header( -type =>'text/plain' );

print $new_sock "QUERY HOST: $1\n";

}
elsif ( $buf =~ m{help}i ) {

print $new_sock "\nUsage: /host//dc\n";

}
elsif ( $buf =~ /QUIT/ ) {

print $new_sock "bye...\n";
close($new_sock);
next SOCK;

}
elsif ( $buf eq "" ) {

close($new_sock);
next SOCK;

}
else {

for my $header ( @ignore ) {
next LINE if $buf =~ /^${header}:/;
}
print "[$buf]\n";

}

}

}


Feedburner Awareness API Script -
lookup feedburner statistics for a Feedburner RSS feed. Get the Reach, Circulation and hits including historical numbers.


#!/usr/bin/env python
# -*- coding: utf-8 -*-
# (C) 2009 HalOtis Marketing
# written by Matt Warren
# http://halotis.com/

import urllib2
try:
from xml.etree import ElementTree
except ImportError:
from elementtree import ElementTree

#add a dates=YYYY-MM-DD,YYYY-MM-DD argument to the url to get all data in a date range
url_prefix = 'https://feedburner.google.com/api/awareness/1.0/GetFeedData?uri='

URIs = ['HalotisBlog',]

def print_feedburner(content):
tree = ElementTree.fromstring(content)
for feed in tree.findall('feed'):
print feed.get('uri'), ':'
for entry in feed.findall('entry'):
print entry.get('date'), '-', entry.get('reach'), '-', entry.get('circulation'), '-', entry.get('hits')


if __name__=='__main__':

for uri in URIs:
content = urllib2.urlopen(url_prefix + uri).read()
print_feedburner(content)
Add a friend's calendar on Google Calendar -
This code shows how to add a friend's calendar to our Google Calendar account.


require 'rubygems'
require 'mechanize'

agent = WWW::Mechanize.new
page = agent.get('https://www.google.com/accounts/ServiceLogin')
form = page.forms.first
form.Email = "user@gmail.com"
form.Passwd = "mypassword"
page2 = agent.submit(form)
page3 = agent.get("http://www.google.com/calendar")
secid = page3.header['set-cookie'].match(/secid=(.*),/)[1]

data = {
"cid" =>"23489dfskj12342gh34kj32434@group.calendar.google.com",
"hl" =>"en",
"lact" =>"ADD",
"ltyp" =>"0",
"secid" =>secid,
}
agent.post("http://www.google.com/calendar/editcallist", data)
Enabling GZIP Compression -
Add to your .htaccess

# BEGIN GZIP

AddOutputFilterByType DEFLATE text/text text/html text/plain text/xml text/css application/x-javascript application/javascript

# END GZIP
UUID, GUID Generation from within Ruby -
An UUID (Universally Unique Identification), also known as GUID, is often used in software to uniquely identify information. Originally designed for distributed systems, UUIDs have quickly found their way to wherever non-conflicting identifiers identifiers are needed.
Following snippet implements a simple cross-platform UUID generation code based on available web-services and and os (win32) specific version.

Please follow the discussion at http://cheind.blogspot.com/2008/12/uuids-for-masses-in-ruby.html for advanced examples and details. In order to run the code you'll need some small extensions to ruby core classes. Please find them at http://cheind.blogspot.com/2008/12/git-repository-for-blog-code.html


#
# Project:: Ruby-Snippets
#
# Author:: Christoph Heindl (mailto:christoph.heindl@gmail.com)
# Homepage:: http://cheind.blogspot.com
#
# == Overview
#
# Implements generation of UUIDs based on
# - A web-service UUID.from_url
# - Platform dependant API's UUID.from_os
#
# Currently only windows platforms are supported

require 'net/http'
require 'uri/http'

# Include extensions, http://cheind.blogspot.com/2008/12/git-repository-for-blog-code.html
require 'ext/reverse_merge.rb'
require 'ext/join.rb'
require 'ext/breakup.rb'

# Provides generation of universal unique identifiers (UUID) or more
# commonly known as GUIDs.
class UUID

# Generates an UUID string by using
# the web-services provided by http://www.fileformat.info
# at http://www.fileformat.info/tool/guid.htm
#
# Options
# - :formatshould always be :text
# - :uppercasesets all letters to uppercase if set
# - :hyphenuses '-' between groups if set
# - :bracketwrap UUID in courly brackets if set
# - :countnumber of UUIDs to receiver.
#
# UUID.from_url # =>"{F4C77E3A-F1C3-45BD-B740-7DD61B889AD9}"
#
# UUID.from_url(:count =>2)
# # =>["{44351F05-8CCC-4408-9FE0-CE41864F03CE}",
# "{3730EA6C-986B-4A4A-A942-ED4C1192D713}]
#
def UUID.from_url(options={})
options = UUID.default_options(options)
# Replace all instances of TrueClass to 'Y' for web-service
options.each { |k,v| options[k] = 'Y' if v.instance_of?(TrueClass) }
# Build URI
uri = URI::HTTP.build(
:host =>'www.fileformat.info',
:path =>'/tool/guid.htm',
:query =>options.join('&', '=')
)
# Query
uuids = Net::HTTP.get(uri)
# Split lines if multiple uuids and
uuids.split($/).breakup
end

# Following block provides platform dependant UUID generation
if RUBY_PLATFORM =~ /mswin/
# Windows platform...
require 'Win32API'

@@api = Win32API.new('rpcrt4', 'UuidCreate', 'P', 'L')

# Generates an UUID string using Win32API.
#
# This is based on code from Brad Wilson posted in 2005 at
# http://www.agileprogrammer.com/dotnetguy/archive/2005/10/27/8991.aspx
#
# Options
# - :uppercasesets all letters to uppercase if set
# - :hyphenuses '-' between groups if set
# - :bracketwrap UUID in courly brackets if set
# - :countnumber of UUIDs to receiver.
#
# UUID.from_os # =>"{F4C77E3A-F1C3-45BD-B740-7DD61B889AD9}"
#
# UUID.from_os(:count =>2)
# # =>["{44351F05-8CCC-4408-9FE0-CE41864F03CE}",
# "{3730EA6C-986B-4A4A-A942-ED4C1192D713}]
#
def UUID.from_os(options={})
options = UUID.default_options(options)
# Setup format string based on options
format = options[:hyphen] ? '{' : ''
format += options[:bracket] ?
'%04X%04X-%04X-%04X-%04X-%04X%04X%04X' :
'%04X%04X%04X%04X%04X%04X%04X%04X'
format += options[:hyphen] ? '}' : ''
# Invoke API
uuids = []
options[:count].times do
buffer = ' ' * 16
@@api.call(buffer)
a, b, c, d, e, f, g, h = buffer.unpack('SSSSSSSS')
uuid = sprintf(format, a, b, c, d, e, f, g, h)
uuid.upcase! if options[:uppercase]
uuids <end
uuids.breakup
end
end

private

# Plugin default options if not already specified in options hash
def UUID.default_options(options)
raise "Generating zero UUIDs makes really no sense..." if options[:count] == 0
options.reverse_merge(
:format =>:text,
:uppercase =>true,
:hyphen =>true,
:bracket =>true,
:count =>1
)
end
end
Check Web Connection -
// Checks access to a web connection by looking for a file on the web server.
// Changing the text in that file allows manually turning off merge synchronization remotely.


Public Function CheckWebConnection() As Boolean
Dim URL As String = "https://data.anonymous.com/publication/index.htm"
Dim rString As String = ""
'Addressof URL'
If My.Settings.Online = False Then
Return False
End If

Try
'Get HTML data'
Dim request As HttpWebRequest = WebRequest.Create(URL)
request.Credentials = New NetworkCredential("WebUSerName", "WebUserPass")
If My.Settings.HasProxy Then
Dim ProxyCreds As New NetworkCredential(My.Settings.ProxyUsername, My.Settings.ProxyPassword)
Dim prxy As New WebProxy(My.Settings.ProxyAddress &":" &My.Settings.ProxyPort)
prxy.Credentials = ProxyCreds
'Get HTML data'
request.Proxy = prxy
End If

Dim response As HttpWebResponse = request.GetResponse()
Dim reader As StreamReader = New StreamReader(response.GetResponseStream())

Dim str As String = reader.ReadLine()
rString = rString &str
If rString = "Success" Then
Return True
Else
Return False
End If
Catch ex As Exception
ThrowError(ex)
Return False
End Try
End Function
Create a Merge Replication -
// Create a web merge synchronization agent on a local SQL Express machine to merge over the web to a central datasource.

Public Sub MergeCreate()
'Create the objects that we need.'
Dim subscription As MergePullSubscription

Try
'Connect to the Subscriber.'
conn.Connect()
'Define the pull subscription.'
subscription = New MergePullSubscription()
subscription.ConnectionContext = conn
subscription.PublisherName = publisherName
subscription.PublicationName = publicationName
subscription.PublicationDBName = publicationDbName

subscription.UseWebSynchronization = True
subscription.InternetUrl = webSyncUrl
subscription.InternetSecurityMode = AuthenticationMethod.BasicAuthentication
subscription.InternetLogin = "WebUser"
subscription.InternetPassword = "WebPass"

subscription.CreateSyncAgentByDefault = True

subscription.DatabaseName = subscriptionDbName
subscription.SubscriberType = MergeSubscriberType.Anonymous
subscription.SubscriberSecurity.WindowsAuthentication = False
subscription.SubscriberSecurity.SqlStandardLogin = "localusername"
subscription.SubscriberSecurity.SqlStandardPassword = "localpassword"
subscription.SynchronizationAgentProcessSecurity.Login = "Standard"
subscription.SynchronizationAgentProcessSecurity.Password = "xxxxxxxx"
subscription.HostName = GlobalBranchID
Try
subscription.Create()
Catch ex As Exception
Throw New ApplicationException("The subscription could not be " + _
"created. Verify that the subscription has " + _
"been defined correctly.", ex)
End Try
Dim agent As MergeSynchronizationAgent
If subscription.LoadProperties() Then
agent = subscription.SynchronizationAgent
If agent.PublisherSecurityMode = Nothing Then
agent.PublisherSecurityMode = SecurityMode.Integrated
agent.Distributor = "Distributor_Name"
agent.DistributorSecurityMode = SecurityMode.Integrated
agent.UseWebSynchronization = True
agent.InternetUrl = webSyncUrl
agent.InternetSecurityMode = SecurityMode.Standard
agent.InternetLogin = winLogin
agent.InternetPassword = winPassword
End If
If My.Settings.HasProxy Then
agent.InternetProxyServer = My.Settings.ProxyAddress &":" &My.Settings.ProxyPort
agent.InternetProxyLogin = My.Settings.ProxyUsername
agent.InternetProxyPassword = My.Settings.ProxyPassword
End If
agent.OutputVerboseLevel = 1
If File.Exists(mPath &"\createmerge.txt") Then
Kill(mPath &"\createmerge.txt")
End If
agent.Output = mPath &"\createmerge.txt"
agent.Synchronize()
Else
Throw New ApplicationException(String.Format( _
"A subscription to '{0}' does not exist on {1}", _
publicationName, subscriberName))
End If
Catch ex As Exception
ThrowError(ex)
Finally
conn.Disconnect()
End Try
End Sub

 Use OpenOffice.org        Spread Firefox Affiliate Button

For any questions or enquiries, i can be reached at my email
I look forward to hearing from you

Copyright © 2010 brentdodson.com