{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# API"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/Users/valery/Documents/_code/arche/src\n"
     ]
    }
   ],
   "source": [
    "%cd ../../../src"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Getting data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script><script type=\"text/javascript\">if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script><script>requirejs.config({paths: { 'plotly': ['https://cdn.plot.ly/plotly-latest.min']},});if(!window._Plotly) {require(['plotly'],function(plotly) {window._Plotly=plotly;});}</script>"
      ],
      "text/vnd.plotly.v1+html": [
       "<script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script><script type=\"text/javascript\">if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script><script>requirejs.config({paths: { 'plotly': ['https://cdn.plot.ly/plotly-latest.min']},});if(!window._Plotly) {require(['plotly'],function(plotly) {window._Plotly=plotly;});}</script>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from arche.readers.items import JobItems"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "job_items = JobItems(key=\"381798/1/1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0573ca4f56214e9985c8bd89639ce594",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, description='Fetching 0:1000 from 381798/1/1', max=1000, style=ProgressSty…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>_key</th>\n",
       "      <th>_type</th>\n",
       "      <th>category</th>\n",
       "      <th>description</th>\n",
       "      <th>price</th>\n",
       "      <th>title</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://app.scrapinghub.com/p/381798/1/1/item/0</td>\n",
       "      <td>dict</td>\n",
       "      <td>Travel</td>\n",
       "      <td>“Wherever you go, whatever you do, just . . . ...</td>\n",
       "      <td>£45.17</td>\n",
       "      <td>It's Only the Himalayas</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://app.scrapinghub.com/p/381798/1/1/item/1</td>\n",
       "      <td>dict</td>\n",
       "      <td>Politics</td>\n",
       "      <td>Libertarianism isn't about winning elections; ...</td>\n",
       "      <td>£51.33</td>\n",
       "      <td>Libertarianism for Beginners</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://app.scrapinghub.com/p/381798/1/1/item/2</td>\n",
       "      <td>dict</td>\n",
       "      <td>Science Fiction</td>\n",
       "      <td>Andrew Barger, award-winning author and engine...</td>\n",
       "      <td>£37.59</td>\n",
       "      <td>Mesaerion: The Best Science Fiction Stories 18...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>https://app.scrapinghub.com/p/381798/1/1/item/3</td>\n",
       "      <td>dict</td>\n",
       "      <td>Poetry</td>\n",
       "      <td>Part fact, part fiction, Tyehimba Jess's much ...</td>\n",
       "      <td>£23.88</td>\n",
       "      <td>Olio</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>https://app.scrapinghub.com/p/381798/1/1/item/4</td>\n",
       "      <td>dict</td>\n",
       "      <td>Music</td>\n",
       "      <td>This is the never-before-told story of the mus...</td>\n",
       "      <td>£57.25</td>\n",
       "      <td>Our Band Could Be Your Life: Scenes from the A...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                              _key _type         category  \\\n",
       "0  https://app.scrapinghub.com/p/381798/1/1/item/0  dict           Travel   \n",
       "1  https://app.scrapinghub.com/p/381798/1/1/item/1  dict         Politics   \n",
       "2  https://app.scrapinghub.com/p/381798/1/1/item/2  dict  Science Fiction   \n",
       "3  https://app.scrapinghub.com/p/381798/1/1/item/3  dict           Poetry   \n",
       "4  https://app.scrapinghub.com/p/381798/1/1/item/4  dict            Music   \n",
       "\n",
       "                                         description   price  \\\n",
       "0  “Wherever you go, whatever you do, just . . . ...  £45.17   \n",
       "1  Libertarianism isn't about winning elections; ...  £51.33   \n",
       "2  Andrew Barger, award-winning author and engine...  £37.59   \n",
       "3  Part fact, part fiction, Tyehimba Jess's much ...  £23.88   \n",
       "4  This is the never-before-told story of the mus...  £57.25   \n",
       "\n",
       "                                               title  \n",
       "0                            It's Only the Himalayas  \n",
       "1                       Libertarianism for Beginners  \n",
       "2  Mesaerion: The Best Science Fiction Stories 18...  \n",
       "3                                               Olio  \n",
       "4  Our Band Could Be Your Life: Scenes from the A...  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "job_items.df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Running rules"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from arche.rules.duplicates import find_by"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Items Uniqueness By Columns:\n",
      "\u001b[31m\u001b[0m\t2 duplicate(s) with same title, category\u001b[0m\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "2 items affected - same 'The Star-Touched Queen' title 'Fantasy' category: <a href='https://app.scrapinghub.com/p/381798/1/1/item/221'>221</a> <a href='https://app.scrapinghub.com/p/381798/1/1/item/341'>341</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<br>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "find_by(job_items.df, [\"title\", \"category\"]).show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
