Skip to content

Commit

Permalink
Rebuilt the docs
Browse files Browse the repository at this point in the history
  • Loading branch information
pafluxa committed Aug 21, 2023
1 parent 60f0b9f commit 80da28d
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 14 deletions.
64 changes: 52 additions & 12 deletions _modules/infer.html
Original file line number Diff line number Diff line change
Expand Up @@ -295,18 +295,58 @@ <h1>Source code for infer</h1><div class="highlight"><pre>
<span class="k">return</span> <span class="n">dtype_guess</span>


<span class="k">def</span> <span class="nf">type_check_date</span><span class="p">(</span><span class="n">element</span><span class="p">:</span> <span class="nb">object</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<div class="viewcode-block" id="type_check_date"><a class="viewcode-back" href="../infer.html#infer.type_check_date">[docs]</a><span class="k">def</span> <span class="nf">type_check_date</span><span class="p">(</span><span class="n">element</span><span class="p">:</span> <span class="nb">object</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Check if element corresponds to a date-like object.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># check if element represents a date (no hour/minute/seconds)</span>
<span class="n">is_date</span> <span class="o">=</span> <span class="kc">False</span>
<span class="c1"># check if element represents a datetime (has hour/minute/seconds)</span>
<span class="n">is_datetime</span> <span class="o">=</span> <span class="kc">False</span>
<span class="c1"># check if it makes sense to convert element to unix time-stamp by</span>
<span class="c1"># evaluating if, when converted, the element represents a number that</span>
<span class="c1"># is compatible with a Unix timestamp (number of seconds since 1970-01-01T:00:00:00)</span>
<span class="c1"># note that we also check the number is not larger than the &quot;epochalypse time&quot;,</span>
<span class="c1"># which is when the unix timestamp becomes larger than 2^32 - 1 seconds. We do</span>
<span class="c1"># this because timestamps outside this range are likely to be unreliable and hence</span>
<span class="c1"># rather treated as every-day numbers.</span>
<span class="n">min_dt</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">to_datetime</span><span class="p">(</span><span class="s1">&#39;1970-01-01 00:00:00&#39;</span><span class="p">,</span> <span class="n">utc</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">max_dt</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">to_datetime</span><span class="p">(</span><span class="s1">&#39;2038-01-19 03:14:08&#39;</span><span class="p">,</span> <span class="n">utc</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">valid_units</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;ns&#39;</span><span class="p">:</span> <span class="s1">&#39;unix&#39;</span><span class="p">,</span> <span class="s1">&#39;us&#39;</span><span class="p">:</span> <span class="s1">&#39;unix&#39;</span><span class="p">,</span> <span class="s1">&#39;ms&#39;</span><span class="p">:</span> <span class="s1">&#39;unix&#39;</span><span class="p">,</span> <span class="s1">&#39;s&#39;</span><span class="p">:</span> <span class="s1">&#39;unix&#39;</span><span class="p">,</span>
<span class="s1">&#39;D&#39;</span><span class="p">:</span> <span class="s1">&#39;julian&#39;</span><span class="p">}</span>
<span class="k">for</span> <span class="n">unit</span><span class="p">,</span> <span class="n">origin</span> <span class="ow">in</span> <span class="n">valid_units</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">as_dt</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">to_datetime</span><span class="p">(</span><span class="n">element</span><span class="p">,</span> <span class="n">unit</span><span class="o">=</span><span class="n">unit</span><span class="p">,</span> <span class="n">origin</span><span class="o">=</span><span class="n">origin</span><span class="p">,</span>
<span class="n">errors</span><span class="o">=</span><span class="s1">&#39;raise&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">min_dt</span> <span class="o">&lt;</span> <span class="n">as_dt</span> <span class="o">&lt;</span> <span class="n">max_dt</span><span class="p">:</span>
<span class="n">is_datetime</span> <span class="o">=</span> <span class="kc">True</span>
<span class="k">break</span>
<span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span>
<span class="k">pass</span>
<span class="c1"># check if element represents a date-like object.</span>
<span class="c1"># here we don&#39;t check for a validity range like with unix-timestamps</span>
<span class="c1"># because dates as string usually represent something more general than</span>
<span class="c1"># just the number of seconds since an epoch.</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">dt</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">to_datetime</span><span class="p">(</span><span class="n">element</span><span class="p">)</span>

<span class="c1"># Not accurate 100% for a single datetime str, but should work in aggregate</span>
<span class="k">if</span> <span class="n">dt</span><span class="o">.</span><span class="n">hour</span> <span class="o">==</span> <span class="mi">0</span> <span class="ow">and</span> <span class="n">dt</span><span class="o">.</span><span class="n">minute</span> <span class="o">==</span> <span class="mi">0</span> <span class="ow">and</span> <span class="n">dt</span><span class="o">.</span><span class="n">second</span> <span class="o">==</span> <span class="mi">0</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">element</span><span class="p">))</span> <span class="o">&lt;=</span> <span class="mi">16</span><span class="p">:</span>
<span class="k">return</span> <span class="n">dtype</span><span class="o">.</span><span class="n">date</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">dtype</span><span class="o">.</span><span class="n">datetime</span>

<span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="n">as_dt</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">to_datetime</span><span class="p">(</span><span class="n">element</span><span class="p">,</span> <span class="n">errors</span><span class="o">=</span><span class="s1">&#39;raise&#39;</span><span class="p">)</span>
<span class="n">is_datetime</span> <span class="o">=</span> <span class="kc">True</span>
<span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span>
<span class="k">pass</span>
<span class="c1"># finally, if element is represents a datetime object, check if only</span>
<span class="c1"># date part is contained (no time information)</span>
<span class="k">if</span> <span class="n">is_datetime</span><span class="p">:</span>
<span class="c1"># round element day (drop hour/minute/second)</span>
<span class="n">dt_d</span> <span class="o">=</span> <span class="n">as_dt</span><span class="o">.</span><span class="n">to_period</span><span class="p">(</span><span class="s1">&#39;D&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">to_timestamp</span><span class="p">()</span>
<span class="c1"># if rounded datetime equals the datetime itself, it means there was not</span>
<span class="c1"># hour/minute/second information to begin with. Mind the &#39;localize&#39; to</span>
<span class="c1"># avoid time-zone BS to kick in.</span>
<span class="n">is_date</span> <span class="o">=</span> <span class="n">dt_d</span> <span class="o">==</span> <span class="n">as_dt</span><span class="o">.</span><span class="n">tz_localize</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span>
<span class="k">if</span> <span class="n">is_date</span><span class="p">:</span>
<span class="k">return</span> <span class="n">dtype</span><span class="o">.</span><span class="n">date</span>
<span class="k">if</span> <span class="n">is_datetime</span><span class="p">:</span>
<span class="k">return</span> <span class="n">dtype</span><span class="o">.</span><span class="n">datetime</span>

<span class="k">return</span> <span class="kc">None</span></div>


<span class="k">def</span> <span class="nf">count_data_types_in_column</span><span class="p">(</span><span class="n">data</span><span class="p">):</span>
Expand Down Expand Up @@ -559,7 +599,7 @@ <h1>Source code for infer</h1><div class="highlight"><pre>
<span class="n">population_size</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
<span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;Analyzing a sample of </span><span class="si">{</span><span class="n">sample_size</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span>
<span class="sa">f</span><span class="s1">&#39;from a total population of </span><span class="si">{</span><span class="n">population_size</span><span class="si">}</span><span class="s1">, this is equivalent to </span><span class="si">{</span><span class="nb">round</span><span class="p">(</span><span class="n">sample_size</span><span class="o">*</span><span class="mi">100</span><span class="o">/</span><span class="n">population_size</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">)</span><span class="si">}</span><span class="s1">% of your data.&#39;</span><span class="p">)</span> <span class="c1"># noqa</span>
<span class="sa">f</span><span class="s1">&#39;from a total population of </span><span class="si">{</span><span class="n">population_size</span><span class="si">}</span><span class="s1">, this is equivalent to </span><span class="si">{</span><span class="nb">round</span><span class="p">(</span><span class="n">sample_size</span><span class="o">*</span><span class="mi">100</span><span class="o">/</span><span class="n">population_size</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">)</span><span class="si">}</span><span class="s1">% of your data.&#39;</span><span class="p">)</span> <span class="c1"># noqa</span>

<span class="n">nr_procs</span> <span class="o">=</span> <span class="n">get_nr_procs</span><span class="p">(</span><span class="n">df</span><span class="o">=</span><span class="n">sample_df</span><span class="p">)</span>
<span class="n">pool_size</span> <span class="o">=</span> <span class="nb">min</span><span class="p">(</span><span class="n">nr_procs</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">sample_df</span><span class="o">.</span><span class="n">columns</span><span class="o">.</span><span class="n">values</span><span class="p">))</span>
Expand Down
1 change: 1 addition & 0 deletions _static/pygments.css
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left:
.highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #A00000 } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
.highlight .gr { color: #FF0000 } /* Generic.Error */
.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
.highlight .gi { color: #00A000 } /* Generic.Inserted */
Expand Down
2 changes: 2 additions & 0 deletions genindex.html
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,8 @@ <h2 id="T">T</h2>
</li>
</ul></td>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="infer.html#infer.type_check_date">type_check_date() (in module infer)</a>
</li>
<li><a href="base.html#base.TypeInformation">TypeInformation (class in base)</a>
</li>
</ul></td>
Expand Down
3 changes: 2 additions & 1 deletion index.html
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ <h1>Type Infer<a class="headerlink" href="#type-infer" title="Permalink to this
<dd class="field-odd"><p>0.0.15</p>
</dd>
<dt class="field-even">Date</dt>
<dd class="field-even"><p>Aug 01, 2023</p>
<dd class="field-even"><p>Aug 21, 2023</p>
</dd>
</dl>
<div class="line-block">
Expand Down Expand Up @@ -298,6 +298,7 @@ <h2>Other Links<a class="headerlink" href="#other-links" title="Permalink to thi
<li class="toctree-l2"><a class="reference internal" href="infer.html#infer.get_column_data_type">get_column_data_type</a></li>
<li class="toctree-l2"><a class="reference internal" href="infer.html#infer.get_numeric_type">get_numeric_type</a></li>
<li class="toctree-l2"><a class="reference internal" href="infer.html#infer.infer_types">infer_types</a></li>
<li class="toctree-l2"><a class="reference internal" href="infer.html#infer.type_check_date">type_check_date</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="helpers.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">Helpers</span></code></a><ul>
Expand Down
12 changes: 12 additions & 0 deletions infer.html
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@
<li class="toctree-l2"><a class="reference internal" href="#infer.get_column_data_type">get_column_data_type</a></li>
<li class="toctree-l2"><a class="reference internal" href="#infer.get_numeric_type">get_numeric_type</a></li>
<li class="toctree-l2"><a class="reference internal" href="#infer.infer_types">infer_types</a></li>
<li class="toctree-l2"><a class="reference internal" href="#infer.type_check_date">type_check_date</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="helpers.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">Helpers</span></code></a></li>
Expand Down Expand Up @@ -260,6 +261,17 @@ <h1><code class="xref py py-mod docutils literal notranslate"><span class="pre">
</dl>
</dd></dl>

<dl class="py function">
<dt class="sig sig-object py" id="infer.type_check_date">
<span class="sig-prename descclassname"><span class="pre">infer.</span></span><span class="sig-name descname"><span class="pre">type_check_date</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">element</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/infer.html#type_check_date"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#infer.type_check_date" title="Permalink to this definition"></a></dt>
<dd><p>Check if element corresponds to a date-like object.</p>
<dl class="field-list simple">
<dt class="field-odd">Return type</dt>
<dd class="field-odd"><p><code class="xref py py-class docutils literal notranslate"><span class="pre">str</span></code></p>
</dd>
</dl>
</dd></dl>

</div>


Expand Down
Binary file modified objects.inv
Binary file not shown.
Loading

0 comments on commit 80da28d

Please sign in to comment.