MySQL stats script

2010-07-15

At my old host I created a MySQL function that would handle saving the visitor stats for my website for me. Basically you had to hand it the information of the page hit (IP, url, etc.) and it will sort it all out. I'm not using it anymore because I didn't get enough rights to make functions on my new (current) host. So I'm doing this in PHP again now :p

Anyways, this is a MySQL function. If the tables don't exist it will create them for you. To be honest, I'm not sure whether this was the old function or the "rewritten-untested-function-after-which-i-discovered-i-couldnt-create-functions" version. Especially the uninitialized @prefix variable worries me. So test it, if you'd ever want to use this :) I'm really just showing it for the heck of it.

This was used on MySQL 5 something.

Code: (MySQL)
DROP FUNCTION IF EXISTS `add_stats_hit`;
DELIMITER $$

CREATE FUNCTION add_stats_hit(_ip INT(10) unsigned, _host TEXT, _url TEXT, _method TEXT, _ref TEXT, _query TEXT, _agent TEXT)
RETURNS INT # ignored: -1, banned: 1, otherwise: 0
MODIFIES SQL DATA # indicates the function contains queries that write to the database
NOT DETERMINISTIC # same input does not guarantee same output
BEGIN
# init vars
SET @isignored = 0;
SET @isbanned = 0;
SET @ishit = 0;
SET @isnewuser = 0;
SET @newtoday = 0;
SET @newtopagetoday = 0;
SET @isnewpage = 0;
SET @isnewpagetoday = 0;

# create tables if they dont exist...
SET @name = CONCAT(@prefix, 'ignored');
IF NOT EXISTS (SELECT 1 FROM Information_schema.tables WHERE table_name = @name AND table_schema = "my_stats") THEN
# use prepared statements to get a dynamic table name (we want to prefix them)
SET @stmt = CONCAT("CREATE TABLE IF NOT EXISTS ",@name," (`iid` int(11) NOT NULL auto_increment,`data` TEXT NULL default NULL,`desc` TEXT NULL default NULL,`since` timestamp NULL default NULL,`hits` int(11) NOT NULL,PRIMARY KEY (`iid`)) ENGINE=MyISAM DEFAULT CHARSET=utf8");
PREPARE stmt FROM @stmt;
EXECUTE stmt;
SET @name = CONCAT(@prefix, 'banned');
SET @stmt = CONCAT("CREATE TABLE IF NOT EXISTS ",@name," (`bid` int(11) NOT NULL auto_increment,`data` TEXT character set utf8 collate utf8_general_ci NOT NULL,`desc` TEXT NULL default NULL,`offense` TEXT NULL,`since` timestamp NULL default NULL,`hits` int(11) NOT NULL,PRIMARY KEY (`data`(255)),UNIQUE KEY (`bid`)) ENGINE=MyISAM DEFAULT CHARSET=utf8");
PREPARE stmt FROM @stmt;
EXECUTE stmt;
SET @name = CONCAT(@prefix, 'agents');
SET @stmt = CONCAT("CREATE TABLE IF NOT EXISTS ",@name," (`aid` int(11) NOT NULL auto_increment,`agent` text character set utf8 collate utf8_bin NOT NULL,`hits` int(11) NOT NULL,`first` timestamp NULL default NULL,`last` timestamp NULL default NULL,PRIMARY KEY (`agent`(300)),UNIQUE KEY `aid` (`aid`)) ENGINE=MyISAM DEFAULT CHARSET=utf8");
PREPARE stmt FROM @stmt;
EXECUTE stmt;
SET @name = CONCAT(@prefix, 'ips');
SET @stmt = CONCAT("CREATE TABLE IF NOT EXISTS ",@name," (`ip` int(10) unsigned NOT NULL,`host` text NOT NULL,`first` timestamp NULL default NULL,`last` timestamp NULL default NULL,`hits` int(11) NOT NULL,`status` enum('ok','ignored','banned') NOT NULL default 'ok',`agent` int(11) NOT NULL,PRIMARY KEY (`ip`)) ENGINE=MyISAM DEFAULT CHARSET=utf8");
PREPARE stmt FROM @stmt;
EXECUTE stmt;
SET @name = CONCAT(@prefix, 'pages');
SET @stmt = CONCAT("CREATE TABLE IF NOT EXISTS ",@name," (`pid` int(11) NOT NULL auto_increment,`url` text character set utf8 collate utf8_bin NOT NULL,`method` enum('get','post','other','ref') NOT NULL default 'ref',`first` timestamp NULL default NULL,`last` timestamp NULL default NULL,`hits` int(11) NOT NULL,`visitors` int(11) NOT NULL,`seen` int(11) NOT NULL comment 'number of times as ref',`query` TEXT NULL default NULL,PRIMARY KEY (`url`(260)),UNIQUE KEY `pid` (`pid`)) ENGINE=MyISAM DEFAULT CHARSET=utf8");
PREPARE stmt FROM @stmt;
EXECUTE stmt;
SET @name = CONCAT(@prefix, 'queries');
SET @stmt = CONCAT("CREATE TABLE IF NOT EXISTS ",@name," (`qid` int(11) NOT NULL auto_increment,`query` text character set utf8 collate utf8_general_ci NOT NULL,`used` int(11) NOT NULL,PRIMARY KEY (`query`(100)),UNIQUE KEY `qid` (`qid`)) ENGINE=MyISAM DEFAULT CHARSET=utf8");
PREPARE stmt FROM @stmt;
EXECUTE stmt;
SET @name = CONCAT(@prefix, 'pageday');
SET @stmt = CONCAT("CREATE TABLE IF NOT EXISTS ",@name," (`pdid` int(11) NOT NULL auto_increment,`page` int(11) NOT NULL,`day` date default NULL,`visitors` int(11) NOT NULL,`hits` int(11) NOT NULL,PRIMARY KEY (`page`,`day`),UNIQUE KEY `pdid` (`pdid`)) ENGINE=MyISAM DEFAULT CHARSET=utf8");
PREPARE stmt FROM @stmt;
EXECUTE stmt;
SET @name = CONCAT(@prefix, 'userpageday');
SET @stmt = CONCAT("CREATE TABLE IF NOT EXISTS ",@name," (`updid` int(11) NOT NULL auto_increment,`pageday` int(11) NOT NULL,`ip` int(10) unsigned NOT NULL,`hits` int(11) NOT NULL,PRIMARY KEY (`ip`, `pageday`),UNIQUE KEY `updid` (`updid`)) ENGINE=MyISAM DEFAULT CHARSET=utf8");
PREPARE stmt FROM @stmt;
EXECUTE stmt;
SET @name = CONCAT(@prefix, 'userpage');
SET @stmt = CONCAT("CREATE TABLE IF NOT EXISTS ",@name," (`upid` int(11) NOT NULL auto_increment,`ip` int(10) unsigned NOT NULL,`page` int(11) NOT NULL,`hits` int(11) NOT NULL,PRIMARY KEY (`ip`,`page`),UNIQUE KEY `upid` (`upid`)) ENGINE=MyISAM DEFAULT CHARSET=utf8");
PREPARE stmt FROM @stmt;
EXECUTE stmt;
SET @name = CONCAT(@prefix, 'totals');
SET @stmt = CONCAT("CREATE TABLE IF NOT EXISTS ",@name," (`day` DATE NOT NULL,`visitors` int(11) NOT NULL,`new` int(11) NOT NULL,`hits` int(11) NOT NULL,`ignored` int(11) NOT NULL COMMENT 'hits',`banned` int(11) NOT NULL COMMENT 'hits',`pages` int(11) NOT NULL COMMENT 'new today',`newpages` int(11) NOT NULL COMMENT 'new at all',PRIMARY KEY (`day`)) ENGINE=MyISAM DEFAULT CHARSET=utf8");
PREPARE stmt FROM @stmt;
EXECUTE stmt;

END IF;

# first check whether the ip, host or agent is (partially) ignored
SELECT `iid` INTO @iid FROM `stats_ignored` WHERE INET_NTOA(_ip) LIKE `data` OR _host LIKE `data` OR _agent LIKE `data` LIMIT 1;
IF @iid > 0 THEN
SET @isignored = 1;
UPDATE `stats_ignored` SET `hits` = `hits` + 1 WHERE `iid` = @iid LIMIT 1;
ELSE
# check whether this user has been banned, by (partial) ip, hostname or agent.
SELECT `bid` INTO @bid FROM `stats_banned` WHERE INET_NTOA(_ip) LIKE `data` OR _host LIKE `data` OR _agent LIKE `data` LIMIT 1;
IF @bid > 0 THEN
SET @isbanned = 1;
UPDATE `stats_banned` SET `hits` = `hits` + 1 WHERE `bid` = @bid LIMIT 1;
# will return a @bid to indicate that this visitor has been banned, one way or the other
# dont return here, update totals table
ELSE
SET @ishit = 1;

# add the agent to the table
INSERT INTO `stats_agents` SET `agent` = _agent, `first` = NOW(), `last` = NOW(), `hits` = 1
ON DUPLICATE KEY UPDATE `hits` = `hits` + 1, `last` = NOW();
SELECT `aid` INTO @agent FROM `stats_agents` WHERE `agent` = _agent LIMIT 1;

# check if this is a new user
SELECT `last` INTO @lastvisit FROM `stats_ips` WHERE `ip` = _ip LIMIT 1;
SET @isnewuser = @lastvisit IS NULL;

SET @newtoday = @isnewuser OR UNIX_TIMESTAMP(@lastvisit) < UNIX_TIMESTAMP(CURDATE());

IF @isnewuser THEN
# put this user into the database
INSERT INTO `stats_ips` SET `ip` = _ip, `host` = _host, `first` = NOW(), `last` = NOW(), `hits` = 1, `status` = 'ok', `agent` = @agent;
ELSE
# update stuff
UPDATE `stats_ips` SET `hits` = `hits` + 1, `last` = NOW() WHERE `ip` = _ip LIMIT 1;
END IF;

# next, we need to put the page in the database, but first we check to see whether
# the page was accessed by this visitor at all, at any time. but for that we need
# the pid for which we need to add the page to the database first, so we have a problem
# well, or not. we fix this by first checking whether the page actually exists at all.
# this will be done by the insert on key exists anyways...
SELECT `pid` INTO @pid FROM `stats_pages` WHERE `url` = _url LIMIT 1;
SET @isnewpage = @pid IS NULL;
IF NOT @isnewpage THEN # the page exists...
# ok the page exists, now check if the user ever visited it... (unless its a new user)
IF NOT @isnewuser THEN
SELECT `page` INTO @newtopage FROM `stats_userpage` WHERE `ip` = _ip AND `page` = @pid LIMIT 1;
END IF;
SET @newtopage = @isnewuser OR (@newtopage IS NULL); # a boolean maps to 1 or 0, we simply add this to visitors
UPDATE `stats_pages` SET `hits` = `hits` + 1, `visitors` = `visitors` + @newtopage, `last` = NOW(), `islocal` = 'yes' WHERE `pid` = @pid LIMIT 1;
# and update the stats_userpage table
IF @newtopage THEN # user visited this page before, update record
INSERT INTO `stats_userpage` SET `ip` = _ip, `page` = @pid, `hits` = 1;
ELSE # user never visited this page, insert new record
UPDATE `stats_userpage` SET `hits` = `hits` + 1 WHERE `page` = @pid LIMIT 1;
END IF;
ELSE # the page does not exist and we need to add it
INSERT INTO `stats_pages` SET `url` = _url, `method` = _method, `first` = NOW(), `last` = NOW(), `hits` = 1 , `visitors` = 1, `seen` = 0, `query` = NULL, `islocal` = 'yes';
# get the new id
SET @pid = LAST_INSERT_ID();
# and reflect the user visiting it
INSERT INTO `stats_userpage` SET `ip` = _ip, `page` = @pid, `hits` = 1;
END IF;
# pid is >0 at this point

# now for the daily stats, we have two tables keeping tabs on this, the page,day combo and
# the user,page,day trio. to know whether the page,day combo should get an extra visitor
# we must first check whether it exists, like before. however, we already have the @isnewpage
# variable, so we might be able to skip this once.
IF NOT @isnewpage THEN # old page, check if it has been visited today
SELECT `pdid` INTO @pdid FROM `stats_pageday` WHERE `page` = @pid AND `day` = CURDATE() LIMIT 1;
SET @isnewpagetoday = @pdid IS NULL;
IF NOT @isnewpagetoday THEN # the page has been visited today, check if it has been by _ip :)
IF NOT @isnewuser THEN
SELECT `updid` INTO @updid FROM `stats_userpageday` WHERE `ip` = _ip AND `pageday` = @pdid LIMIT 1;
END IF;
SET @newtopagetoday = @isnewuser OR (@updid IS NULL); # booleans are mapped to 0 and 1, so we add this to the visitor update
# now update the record
UPDATE `stats_pageday` SET `hits` = `hits` + 1, `visitors` = `visitors` + @newtopagetoday WHERE `pdid` = @pdid LIMIT 1;
# and insert or update the upd record
IF @newtopagetoday THEN
INSERT INTO `stats_userpageday` SET `ip` = _ip, `pageday` = @pdid, `hits` = 1;
ELSE
UPDATE `stats_userpageday` SET `hits` = `hits` + 1 WHERE `updid` = @updid LIMIT 1;
END IF;
ELSE # page has not been visited today, add it...
SET @newtopagetoday = 1;
INSERT INTO `stats_pageday` SET `page` = @pid, `day` = CURDATE(), `hits` = 1, `visitors` = 1;
SET @pdid = LAST_INSERT_ID();
# and the fact that the user visited it
INSERT INTO `stats_userpageday` SET `ip` = _ip, `pageday` = @pdid, `hits` = 1;
END IF;
ELSE # was a new page, no point in bothering to check for new visitor
SET @isnewpagetoday = 1;
INSERT INTO `stats_pageday` SET `page` = @pid, `day` = CURDATE(), `hits` = 1, `visitors` = 1;
SET @pdid = LAST_INSERT_ID();
# and the fact that the user visited it
INSERT INTO `stats_userpageday` SET `ip` = _ip, `pageday` = @pdid, `hits` = 1;
END IF;

# add the query string
IF _query IS NOT NULL THEN
INSERT INTO `stats_queries` SET `query` = _query, `used` = 1
ON DUPLICATE KEY UPDATE `used` = `used` + 1;
SELECT `qid` INTO @qid FROM `stats_queries` WHERE `query` = _query LIMIT 1;
END IF;

# now add it to the referer
IF _ref IS NOT NULL THEN
INSERT INTO `stats_pages` SET `url` = _ref, `first` = NOW(), `last` = NOW(), `seen` = 1, `hits` = 0, `visitors` = 0, `query` = @qid, `islocal` = 'no'
ON DUPLICATE KEY UPDATE `seen` = `seen` + 1, `last` = NOW(), `query` = @qid;
#SELECT `pid` INTO @ref FROM `stats_pages` WHERE `url` = _ref LIMIT 1;
END IF;
END IF;
END IF;

# pure redundantly, but saves computation time, and looks nice :p
INSERT INTO `stats_totals` SET `day` = CURDATE(), `visitors` = 1, `new` = @isnewuser, `hits` = @ishit, `ignored` = @isignored, `banned` = @isbanned, `pages` = 1, `newpages` = @isnewpage
ON DUPLICATE KEY UPDATE `visitors` = `visitors` + @newtoday, `new` = `new` + @isnewuser, `hits` = `hits` + @ishit, `ignored` = `ignored` + @isignored, `banned` = `banned` + @isbanned, `pages` = `pages` + @isnewpagetoday, `newpages` = `newpages` + @isnewpage;

IF @isignored THEN RETURN -1; END IF;
IF @isbanned THEN RETURN 1; END IF;
RETURN 0;
END$$

And you use it by executing a query with the function. It will return -1 if the user was ignored, -1 if ignored and 0 otherwise. You have to popuplate the ignore and ban table yourself (they take LIKE masks for hostnames (%.google.com), IP's (192.168.6.%) and user agents (%bot%).

Code: (MySQL)
SELECT add_stats_hit(4564786, "visitor.qfox.nl", "http://mywebsite.com/url.html", "GET", "http://google.com/?q=items", "items", "Chrome");

Hope it helps you. Maybe it if you're stuck on creating functions or procedures in MySQL or something :)