diff --git a/P/019_Plagiarism_Checker.ipynb b/P/019_Plagiarism_Checker.ipynb new file mode 100644 index 00000000..9f95d77f --- /dev/null +++ b/P/019_Plagiarism_Checker.ipynb @@ -0,0 +1,156 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "All the IPython Notebooks in **Python Mini-Projects** series by Dr. Milaan Parmar are available @ **[GitHub](https://github.com/milaan9/91_Python_Mini_Projects)**\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Python Program to Check Plagiarism " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2021-10-05T13:06:11.749110Z", + "start_time": "2021-10-05T13:06:09.467318Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Similarity data:\n", + " ('Ben.txt', 'Clark.txt', 0.408904884400347)\n", + "Similarity data:\n", + " ('Arthur.txt', 'Clark.txt', 0.5430431121089816)\n", + "Similarity data:\n", + " ('Arthur.txt', 'Ben.txt', 0.4595329317649595)\n" + ] + } + ], + "source": [ + "'''\n", + "Python Program to Check Plagiarism \n", + "'''\n", + "\n", + "# Import necessary modules!\n", + "import os\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.metrics.pairwise import cosine_similarity\n", + "\n", + "student_files = [doc for doc in os.listdir() if doc.endswith('.txt')]\n", + "student_notes = [open(_file, encoding='utf-8').read()\n", + " for _file in student_files]\n", + "\n", + "\n", + "def vectorize(Text): return TfidfVectorizer().fit_transform(Text).toarray()\n", + "def similarity(doc1, doc2): return cosine_similarity([doc1, doc2])\n", + "\n", + "\n", + "vectors = vectorize(student_notes)\n", + "s_vectors = list(zip(student_files, vectors))\n", + "plagiarism_results = set()\n", + "\n", + "\n", + "def check_plagiarism():\n", + " global s_vectors\n", + " for student_a, text_vector_a in s_vectors:\n", + " new_vectors = s_vectors.copy()\n", + " current_index = new_vectors.index((student_a, text_vector_a))\n", + " del new_vectors[current_index]\n", + " for student_b, text_vector_b in new_vectors:\n", + " sim_score = similarity(text_vector_a, text_vector_b)[0][1]\n", + " student_pair = sorted((student_a, student_b))\n", + " score = (student_pair[0], student_pair[1], sim_score)\n", + " plagiarism_results.add(score)\n", + " return plagiarism_results\n", + "\n", + "\n", + "for data in check_plagiarism():\n", + " print(\"Similarity data:\\n\", data) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "hide_input": false, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/P/Arthur.txt b/P/Arthur.txt new file mode 100644 index 00000000..9fa79b09 --- /dev/null +++ b/P/Arthur.txt @@ -0,0 +1 @@ +Success can mean a variety of different things. Success is, quite simply, the accomplishment of a predetermined goal. To some people it could mean making money, cultivate and develop certain basic qualities, to others it could mean keeping everyone happy, but to me, it means achieving the goals and objective I have set for myself for my life. Besides working on your goals that would lead a person towards success it is very important to push your limit every day, take charge of your life, and keep learning. This experience enables us to think smartly to solve a critical problem and achieve success. It is very important to take care of your mind which could be done by eliminating negative thoughts and negative people from your life. I think in order to call something successful, both the result and the process should be great. Without success, you, the group, your company, your goals, dreams and even entire civilizations cease to survive. \ No newline at end of file diff --git a/P/Ben.txt b/P/Ben.txt new file mode 100644 index 00000000..f86f3729 --- /dev/null +++ b/P/Ben.txt @@ -0,0 +1 @@ +In order to be successful one needs cultivate and develop certain basic qualities. Success is, quite simply, the accomplishment of a predetermined goal. We consciously or subconsciously set goals for ourselves all the time. First of all, you must know aim and objective of your life. Unless you know your destination, you cannot set out on a journey. At first, you must by very clear in your objectives to be achieved. Finally, you should enjoy the overall process rather than the final outcome. There is no shortcut to success. Hard work is the only key to achieving it; it teaches us discipline, dedication and determination. There is no single right way to be successful. What works for you might not work for someone else. \ No newline at end of file diff --git a/P/Clark.txt b/P/Clark.txt new file mode 100644 index 00000000..f754a3c8 --- /dev/null +++ b/P/Clark.txt @@ -0,0 +1 @@ +Success is, quite simply, the accomplishment of a predetermined goal. Success is considered to be a term that describes two things. It also the combination of variety of different things. The first one is achievement of a certain major or minor goal. This could be succeeding in making a delicious dinner, or a more global thing succeeding in a career or job. The second definition of success is more broad and subjective. Success provides confidence, security, a sense of well-being, the ability to contribute at a greater level, hope and leadership. This experience enables us to think smartly to solve a critical problem and achieve success. Without success, you, the group, your company, your goals, dreams and even entire civilizations cease to survive. \ No newline at end of file diff --git a/P/README.md b/P/README.md new file mode 100644 index 00000000..ca83e675 --- /dev/null +++ b/P/README.md @@ -0,0 +1,96 @@ +
+ + + +# Plagiarism Checker + +In this class, you'll learn how to check similarity between text (.txt) documents using cosine similarity + +In order to compute the simlilarity between on two text documents, the textual raw data is transformed into vectors β‘ **arrays of numbers** and then from that we are going to use a basic knowledge vector to compute the the similarity between them. + + +
+
+