Skip to content

Commit

Permalink
User Freindly Output For Plagiarism Checker.
Browse files Browse the repository at this point in the history
  • Loading branch information
DevIos01 committed Dec 19, 2023
1 parent bfb2337 commit b802ae9
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 29 deletions.
31 changes: 10 additions & 21 deletions .github/scripts/extract_percentages.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,31 +22,17 @@ def extract_similarity_percentage(html_file):
return None

def process_html_files(directory, threshold=50):
results = {}
log("Processing HTML files for plagiarism results...")
high_plagiarism_detected = False
for filename in os.listdir(directory):
if filename.endswith(".html"):
file_path = os.path.join(directory, filename)
percentage = extract_similarity_percentage(file_path)
if percentage is not None:
results[filename.replace('.html', '.js')] = percentage
log(f"Extracted {percentage}% similarity from {filename}")
if percentage is not None and percentage >= threshold:
log(f"High plagiarism detected - {filename.replace('.html', '.js')}: {percentage}%")
high_plagiarism_detected = True

filtered_sorted_results = sorted(
((file, percent) for file, percent in results.items() if percent >= threshold),
key=lambda x: x[1], reverse=True
)

with open('plagiarism_results.txt', 'w') as output_file:
log("Writing results to plagiarism_results.txt")
output_file.write("Filtered and Sorted Results (Above Threshold):\n")
for file, percent in filtered_sorted_results:
line = f"{file}: {percent}%\n"
output_file.write(line)
log(line.strip())
if not filtered_sorted_results:
output_file.write("No results exceeding threshold.\n")
log("No results exceeding threshold.")
return high_plagiarism_detected

def main():
if len(sys.argv) != 2:
Expand All @@ -56,8 +42,11 @@ def main():

saved_dir_path = sys.argv[1]
log(f"Received saved directory path: {saved_dir_path}")
process_html_files(saved_dir_path)
log("Extraction of plagiarism percentages completed.")
if process_html_files(saved_dir_path):
log("High plagiarism percentages detected.")
sys.exit(1)
else:
log("No high plagiarism percentages detected.")

if __name__ == "__main__":
main()
12 changes: 4 additions & 8 deletions .github/workflows/check_plagiarism.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,15 @@ jobs:

- name: Extract and Display Similarity Percentages
run: python .github/scripts/extract_percentages.py saved_dir/
id: extract-percentages

- name: Upload Compare50 Results as Artifacts
if: always()
uses: actions/upload-artifact@v3
with:
name: compare50-results
path: saved_dir/

- name: Check for High Plagiarism Percentages
if: success()
run: |
if grep -qE ".*: (\d{1,3})%" plagiarism_results.txt; then
echo "Plagiarism percentage over threshold detected."
exit 1
else
echo "No high plagiarism percentages detected."
fi
if: steps.extract-percentages.outcome == 'failure'
run: echo "Plagiarism percentage over threshold detected."

1 comment on commit b802ae9

@vercel
Copy link

@vercel vercel bot commented on b802ae9 Dec 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

sprig – ./

sprig-beta.hackclub.dev
sprig-git-main.hackclub.dev
sprig.hackclub.com
gamelab.hackclub.com

Please sign in to comment.