Update list of crawlers
[infodrom.org/www.zeitungsliste.de] / lib / core.inc
1 <?php
2
3 function db_connect()
4 {
5   global $cfg;
6
7   $dsn = "dbname=".$cfg['dbname'];
8   if (isset($cfg['dbhost'])) $dsn .= " host=".$cfg['dbhost'];
9   if (isset($cfg['dbport'])) $dsn .= " port=".$cfg['dbport'];
10   if (isset($cfg['dbuser'])) $dsn .= " user=".$cfg['dbuser'];
11   if (isset($cfg['dbpass'])) $dsn .= " password=".$cfg['dbpass'];
12
13   $dbh = pg_pconnect ($dsn)
14     or carp("Unable to connect to SQL server");
15   pg_exec ($dbh, "SET DateStyle='ISO'");
16
17   return $dbh;
18 }
19
20 function db_query($query)
21 {
22   global $cfg;
23
24   // error_log($query);
25   $sth = pg_exec ($cfg['dbh'], $query);
26
27   if ($sth === false) {
28     error_log ($query);
29     error_log (pg_last_error($cfg['dbh']));
30     return false;
31   }
32
33   return $sth;
34 }
35
36 function db_last_id($table, $column)
37 {
38   $query = sprintf ("SELECT currval('%s_%s_seq')", $table, $column);
39
40   $sth =  db_query ($query);
41
42   if (!$sth)
43     return false;
44
45   if (pg_num_rows($sth) == 0) {
46     error_log ("$query resulted in an empty set");
47     return false;
48   }
49
50   $row = pg_fetch_row($sth, 0);
51
52   return $row[0];
53 }
54
55 // preserve <p></p>, <b></b>, <em></em>, <br>, <a href></a>
56 function string_sanitise($text)
57 {
58   $ntext = preg_replace (array('/<a\s+href=["\']?([^>"\']*)["\']?>([^<]*)<\/a>/i'),
59                          array('[[$1][$2]]'),
60                          $text);
61
62   $ntext = htmlspecialchars($ntext, ENT_QUOTES);
63
64   $ntext = preg_replace(array('/&lt;(\/?(p|br|b|em))&gt;/i',
65                               '/\[\[([^\]]+)\]\[([^\]]+)\]\]/'),
66                         array('<$1>','<a href="$1">$2</a>'),
67                         $ntext);
68
69   return $ntext;
70 }
71
72 function basepath()
73 {
74   global $cfg;
75
76   $pos = strpos(strtolower($cfg['home']), strtolower($_SERVER['SERVER_NAME']));
77   $base = substr($cfg['home'], $pos+strlen($_SERVER['SERVER_NAME']));
78
79   $pos = strpos($_SERVER['REQUEST_URI'], "?");
80   if ($pos === false)
81     $uri = $_SERVER['REQUEST_URI'];
82   else 
83     $uri = substr($_SERVER['REQUEST_URI'], 0, $pos);
84
85   if (strpos($uri, $base) === 0) {
86     $cfg['path'] = substr($uri, strlen($base));
87     $pos = strrpos($cfg['path'], '/');
88     if ($pos !== false) {
89       $cfg['dir'] = substr($cfg['path'], 0, $pos);
90     }
91     return str_repeat("../", substr_count($cfg['path'], "/"));
92   }
93 }
94
95 function logged_in()
96 {
97   return isset($_SESSION['uid']);
98 }
99
100 function javascript_ok()
101 {
102   if (!logged_in())
103     return true;
104
105   if ($_SESSION['javascript'] == true)
106     return true;
107
108   return false;
109 }
110
111 function is_spider()
112 {
113   if (isset($_SESSION['uid']))
114     if (isset($_SESSION['robot']))
115       return $_SESSION['robot'];
116
117   $known_spiders = array('Yahoo! Slurp',
118                          'Googlebot',
119                          'Mediapartners-Google',
120                          'VoilaBot',
121                          'Gigabot/3.0',
122                          'Speedy Spider',
123                          'LinkWalker/2.0',
124                          'proximic',
125                          'Yeti/1.0',
126                          'Eurobot/1.0',
127                          'MnoGoSearch/',
128                          'ia_archiver',
129                          'Seekbot/1.0',
130                          'MyEngines-Bot',
131                          'larbin_',
132                          'findlinks/1',
133                          'holmes/3.12',
134                          'NoteworthyBot/0.1',
135                          'Eurosoft-Bot',
136                          'Eurobot/1',
137                          'Yandex',
138                          'msnbot',
139                          'Twiceler-0.9',
140                          'ScoutJet',
141                          'penthesilea/0.3',
142                          'VisBot/2.0',
143                          'Netluchs/Nutch-1.0',
144                          'Yanga WorldSearch Bot',
145                          'Semager',
146                          'DotBot',
147                          'Baiduspider',
148                          'MJ12bot/',
149                          'Yahoo-MMCrawler',
150                          'Charlotte/1.1',
151                          'Gaisbot/3.0',
152                          'psbot/0.1',
153                          'Ask Jeeves/Teoma',
154                          'SapphireWebCrawler/Nutch',
155                          'kalooga/KaloogaBot',
156                          'SimilarPages/Nutch-1.0-dev',
157                          'Cogentbot/1.',
158                          'CatchBot/1.0',
159                          'GingerCrawler/1.0',
160                          'yacybot',
161                          'www.yacy.net',
162                          'Nutch-1.0',
163                          'Tagoobot/3.0',
164                          'SapphireWebCrawler/1.0',
165                          'BotOnParade',
166                          'SurveyBot/2.3',
167                          'XmarksFetch/1.0',
168                          'spbot/',
169                          'TinEye/1.1',
170                          'TurnitinBot/2.1',
171                          'CligooRobot/1.0',
172                          'libwww-perl',
173                          'Wget',
174                          'Python-urllib',
175                          'CamontSpider/1.0',
176                          'TwengaBot-Discover',
177                          'Hailoobot/1.2',
178                          'bingbot/2.0',
179                          'Exabot/3.0',
180                          'spbot/2.1',
181                          'Search17Bot/1',
182                          'Linguee Bot',
183                          'findlinks/2',
184                          'SiteBot/0.1',
185                          'Purebot/1.1',
186                          'Ezooms/1.0',
187                          'discobot/1.1',
188                          'AhrefsBot/1.0',
189                          'suggybot v0.01a',
190                          'DomainCrawler/2.0',
191                          'Plukkie/1',
192                          '/Nutch-',
193                          'aiHitBot/1.0',
194                          'AhrefsBot/',
195                          'Crawler',
196                          'Acoon',
197                          'aiHitBot',
198                          'SeznamBot',
199                          'CareerBot/1',
200                          'SiteExplorer/1',
201                          'discoverybot/2.0',
202                          'SemrushBot/',
203                          'archive.org_bot',
204                          'waybackarchive.org/1.0',
205                          'spbot/4.0',
206                          'Mail.RU_Bot/2.0',
207                          'linkdexbot/2.0',
208                          'webmeup-crawler.com',
209                          'meanpathbot/',
210                          'SearchmetricsBot',
211                          'publiclibraryarchive.org/',
212                          'memoryBot/1',
213                          'Lipperhey SEO Service',
214                          'Sitedomain-Bot',
215                          'seoscanners.net/',
216                          'Qwantify/',
217                          'WBSearchBot/',
218                          'Seobility',
219                          'MetaJobBot',
220                          'BellPagesCA/1.0',
221                          'SafeDNSBot',
222                          'Uptimebot/1.0',
223                          'DomainStatsBot/1.0',
224                          'MetaJobBot',
225                          'SEOkicks-Robot',
226                          'Cliqzbot',
227                          'bingbot/3',
228                          'SEOkicks-Robot',
229                          'MojeekBot/',
230                          'oBot/',
231                          'CCBot/',
232                          'TurnitinBot',
233                          'ips-agent',
234                          'ExtLinksBot/',
235                          'Barkrowler/',
236                          'adscanner/',
237                          'Cityreview Robot');
238
239   foreach ($known_spiders as $spider_id) {
240     if (strpos($_SERVER['HTTP_USER_AGENT'], $spider_id) !== false) {
241       if (isset($_SESSION['uid']))
242         $_SESSION['robot'] = true;
243       return true;
244     }
245   }
246
247   if (isset($_SESSION['uid']))
248     $_SESSION['robot'] = false;
249   return false;
250 }
251
252 function is_admin()
253 {
254   if (is_null($_SESSION)) return false;
255
256   if (!array_key_exists('nickname', $_SESSION))
257     return false;
258
259   error_log( $_SESSION['nickname'] === 'Joey');
260   return $_SESSION['nickname'] === 'Joey';
261 }
262
263 function format_info_bookmarks()
264 {
265   global $cfg;
266
267   if (!isset($_SESSION['uid']))
268     return false;
269
270   $query = sprintf("SELECT zeitung,name FROM bookmarks " .
271                    "JOIN zeitungen ON zeitungen.id = zeitung " .
272                    "WHERE uid = %d " .
273                    "ORDER BY priority,name LIMIT 20",
274                    $_SESSION['uid']);
275
276   $sth = db_query($query);
277
278   if ($sth === false || pg_num_rows ($sth) == 0)
279     return false;
280
281   $ret = '<p><ul>';
282   for ($n=0; $n < pg_num_rows ($sth); $n++) {
283     $row = pg_fetch_array ($sth, $n);
284     $ret .= sprintf('<li><a href="%szeitung/%d.html">%s</a></li>',
285                     $cfg['basepath'], $row['zeitung'], $row['name']);
286   }
287   $ret .= '</ul></p>';
288
289   return $ret;
290 }