Comparison with SQL¶
Since many potential pandas users have some familiarity with SQL, this page is meant to provide some examples of how various SQL operations would be performed using pandas.
If you’re new to pandas, you might want to first read through 10 Minutes to pandas to familiarize yourself with the library.
As is customary, we import pandas and NumPy as follows:
In [1]: import pandas as pd
In [2]: import numpy as np
Most of the examples will utilize the tips dataset found within pandas tests. We’ll read
the data into a DataFrame called tips and assume we have a database table of the same name and
structure.
In [3]: url = 'https://raw.github.com/pandas-dev/pandas/master/pandas/tests/data/tips.csv'
In [4]: tips = pd.read_csv(url)
---------------------------------------------------------------------------
ConnectionRefusedError Traceback (most recent call last)
/usr/lib/python3.7/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
1323 h.request(req.get_method(), req.selector, req.data, headers,
-> 1324 encode_chunked=req.has_header('Transfer-encoding'))
1325 except OSError as err: # timeout error
/usr/lib/python3.7/http/client.py in request(self, method, url, body, headers, encode_chunked)
1263 """Send a complete request to the server."""
-> 1264 self._send_request(method, url, body, headers, encode_chunked)
1265
/usr/lib/python3.7/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
1309 body = _encode(body, 'body')
-> 1310 self.endheaders(body, encode_chunked=encode_chunked)
1311
/usr/lib/python3.7/http/client.py in endheaders(self, message_body, encode_chunked)
1258 raise CannotSendHeader()
-> 1259 self._send_output(message_body, encode_chunked=encode_chunked)
1260
/usr/lib/python3.7/http/client.py in _send_output(self, message_body, encode_chunked)
1033 del self._buffer[:]
-> 1034 self.send(msg)
1035
/usr/lib/python3.7/http/client.py in send(self, data)
973 if self.auto_open:
--> 974 self.connect()
975 else:
/usr/lib/python3.7/http/client.py in connect(self)
1418
-> 1419 super().connect()
1420
/usr/lib/python3.7/http/client.py in connect(self)
945 self.sock = self._create_connection(
--> 946 (self.host,self.port), self.timeout, self.source_address)
947 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
/usr/lib/python3.7/socket.py in create_connection(address, timeout, source_address)
726 if err is not None:
--> 727 raise err
728 else:
/usr/lib/python3.7/socket.py in create_connection(address, timeout, source_address)
715 sock.bind(source_address)
--> 716 sock.connect(sa)
717 # Break explicitly a reference cycle
ConnectionRefusedError: [Errno 111] Connection refused
During handling of the above exception, another exception occurred:
URLError Traceback (most recent call last)
<ipython-input-4-8ab2297b7141> in <module>()
----> 1 tips = pd.read_csv(url)
/builds/AstraOS/buildsystem/tbs_build/pandas/pandas-0.23.3+dfsg/debian/python3-pandas/usr/lib/python3/dist-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision)
676 skip_blank_lines=skip_blank_lines)
677
--> 678 return _read(filepath_or_buffer, kwds)
679
680 parser_f.__name__ = name
/builds/AstraOS/buildsystem/tbs_build/pandas/pandas-0.23.3+dfsg/debian/python3-pandas/usr/lib/python3/dist-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
422 compression = _infer_compression(filepath_or_buffer, compression)
423 filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
--> 424 filepath_or_buffer, encoding, compression)
425 kwds['compression'] = compression
426
/builds/AstraOS/buildsystem/tbs_build/pandas/pandas-0.23.3+dfsg/debian/python3-pandas/usr/lib/python3/dist-packages/pandas/io/common.py in get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode)
193
194 if _is_url(filepath_or_buffer):
--> 195 req = _urlopen(filepath_or_buffer)
196 content_encoding = req.headers.get('Content-Encoding', None)
197 if content_encoding == 'gzip':
/usr/lib/python3.7/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
220 else:
221 opener = _opener
--> 222 return opener.open(url, data, timeout)
223
224 def install_opener(opener):
/usr/lib/python3.7/urllib/request.py in open(self, fullurl, data, timeout)
523 req = meth(req)
524
--> 525 response = self._open(req, data)
526
527 # post-process response
/usr/lib/python3.7/urllib/request.py in _open(self, req, data)
541 protocol = req.type
542 result = self._call_chain(self.handle_open, protocol, protocol +
--> 543 '_open', req)
544 if result:
545 return result
/usr/lib/python3.7/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
501 for handler in handlers:
502 func = getattr(handler, meth_name)
--> 503 result = func(*args)
504 if result is not None:
505 return result
/usr/lib/python3.7/urllib/request.py in https_open(self, req)
1365 def https_open(self, req):
1366 return self.do_open(http.client.HTTPSConnection, req,
-> 1367 context=self._context, check_hostname=self._check_hostname)
1368
1369 https_request = AbstractHTTPHandler.do_request_
/usr/lib/python3.7/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
1324 encode_chunked=req.has_header('Transfer-encoding'))
1325 except OSError as err: # timeout error
-> 1326 raise URLError(err)
1327 r = h.getresponse()
1328 except:
URLError: <urlopen error [Errno 111] Connection refused>
In [5]: tips.head()